《搜索引擎零距离》IRQL语言的解析

roki
浏览: 62098 次
性别:
来自: 上海
最近访客更多访客>>

wind2412
lanyi921
hadasione
whx2010nj
博主相关

博客
微博
相册
留言
关于我
文章分类

社区版块

存档分类

搜索引擎 F#EXT DAO J#
package com.rayeen.spider.vertical.util;

import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;

import com.rayeen.spider.vertical.ParalleIRVirtualMachine;
import com.rayeen.spider.vertical.algorithm.AddFunction;
import com.rayeen.spider.vertical.algorithm.ClearTagFunction;
import com.rayeen.spider.vertical.algorithm.DoubleParameterFunction;
import com.rayeen.spider.vertical.algorithm.EqNullOperator;
import com.rayeen.spider.vertical.algorithm.EqOperator;
import com.rayeen.spider.vertical.algorithm.EqStringOperator;
import com.rayeen.spider.vertical.algorithm.FullUrlFunction;
import com.rayeen.spider.vertical.algorithm.Function;
import com.rayeen.spider.vertical.algorithm.MaxlengthFunction;
import com.rayeen.spider.vertical.algorithm.Operator;
import com.rayeen.spider.vertical.algorithm.RecursiveFunction;
import com.rayeen.spider.vertical.algorithm.ReplaceFunction;
import com.rayeen.spider.vertical.algorithm.SprintfFunction;
import com.rayeen.spider.vertical.algorithm.UneqNullOperator;
import com.rayeen.spider.vertical.algorithm.UneqOperator;
import com.rayeen.spider.vertical.algorithm.UneqStringOperator;
import com.rayeen.spider.vertical.algorithm.UniParameterFunction;
import com.rayeen.spider.vertical.auxiliary.CrawlResultSetCollection;
import com.rayeen.spider.vertical.auxiliary.SemanticException;
import com.rayeen.spider.vertical.auxiliary.TableMerge;
import com.rayeen.spider.vertical.constant.ArgumentType;
import com.rayeen.spider.vertical.constant.ConfConstant;
import com.rayeen.spider.vertical.constant.ErrorType;
import com.rayeen.spider.vertical.constant.FunctionConstant;

public class ResutTree {

	static final Logger LOG = Logger.getLogger(ResutTree.class);

	static Map<String, Function> FunctionNameMap = new ConcurrentHashMap<String, Function>();

	// 单参数的函数
	static Set<String> uniParameterFunction = new HashSet<String>();

	//
	static Set<String> doubleParameterFunction = new HashSet<String>();

	static {
		uniParameterFunction.add(FunctionConstant.FULL_URL);
		uniParameterFunction.add(FunctionConstant.CLEAR_TAG);

		doubleParameterFunction.add(FunctionConstant.MAX_LENGTH);
		doubleParameterFunction.add(FunctionConstant.ADD);

	}

	static Function getFunctionInstance(String func) {

		Class cls = FunctionNameMap.get(func).getClass();
		Function f = null;
		try {
			f = (Function) cls.newInstance();
		} catch (InstantiationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IllegalAccessException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}

		return f;
	}

	// 考虑f(..) showF:toByte情况
	static Pattern FUNC_PATTERN = Pattern
			.compile("(\\w+)\\s*\\((.+?)\\)\\s+(\\w+(:\\w+)?)");

	Map<String, Function> fieldFunctionMap = new ConcurrentHashMap<String, Function>();

	Map<String, Function> showFieldFunctionMap = new ConcurrentHashMap<String, Function>();

	static Map<String, Operator> operators = new LinkedHashMap<String, Operator>();
	static {
		operators.put("!=", new UneqOperator());
		operators.put("=", new EqOperator());
		operators.put("is", new EqNullOperator());
		operators.put("not", new UneqNullOperator());

		FunctionNameMap.put(FunctionConstant.FULL_URL, new FullUrlFunction());
		FunctionNameMap.put(FunctionConstant.SPRINGTF, new SprintfFunction());
		FunctionNameMap
				.put(FunctionConstant.RECURSIVE, new RecursiveFunction());
		FunctionNameMap.put(FunctionConstant.ADD, new AddFunction());
		FunctionNameMap.put(FunctionConstant.REPLACE, new ReplaceFunction());
		FunctionNameMap.put(FunctionConstant.CLEAR_TAG, new ClearTagFunction());
		FunctionNameMap.put(FunctionConstant.MAX_LENGTH,
				new MaxlengthFunction());

	}

	public ResutTree(URL rootUrl,
			Map<String, Map<String, String>> hierarchyResult,
			CrawlResultSetCollection crawlRSC) {

		this.rootUrl = rootUrl;
		this.hierarchyResult = hierarchyResult;
		this.crawlRSC = crawlRSC;
		/**
		 * curUniResultTableMap里的数据的key以"tableName->extractName"构成
		 */
	}

	static void error(String str) throws SemanticException {
		LOG.error(str);
		throw new SemanticException(str);
	}

	static void warn(String str) {
		LOG.error(str);
	}

	URL rootUrl;

	Map<String, Map<String, String>> hierarchyResult = new HashMap<String, Map<String, String>>();

	CrawlResultSetCollection crawlRSC;

	Map<String, TableMerge> mergeMap = new ConcurrentHashMap();

	class Pfk {
		String p;// 表名

		String f;// 内部字段

		String k;// 表示字段

		public Pfk(String p, String f, String k) {
			super();
			this.p = p;
			this.f = f;
			this.k = k;

			if (StringUtils.isEmpty(k)) {
				this.k = f;
			}
		}

		public String toString() {
			return p + ":" + f + ":" + k;
		}
	}

	class Pkpk {

		Operator operator;

		ArgumentType argumentType;

		List<String> argList;

		/**
		 * 不同argmentType ，和operator会对argList做不同的处理
		 * 
		 */
		public Pkpk(ArgumentType argmentType, Operator operator,
				List<String> argList) {
			this.argumentType = argmentType;
			this.operator = operator;
			this.argList = argList;
		}

		public String toString() {
			return argumentType + ":" + operator + ":" + argList.toString();
		}

		public List<String> getArgList() {
			return argList;
		}

		public void setArgList(List<String> argList) {
			this.argList = argList;
		}

		public ArgumentType getArgmentType() {
			return argumentType;
		}

		public void setArgmentType(ArgumentType argmentType) {
			this.argumentType = argmentType;
		}

		public Operator getOperator() {
			return operator;
		}

		public void setOperator(Operator operator) {
			this.operator = operator;
		}

	}

	private List<Map> filterFinalResult(List<Map<String, String>> mainRows,
			List<Pfk> pfkList, ArrayList<Pfk> hierarchyPfkList,
			List<Pkpk> pkpkList) throws SemanticException {

		Map<String, String> fieldMap = new HashMap<String, String>();
		for (Pfk tmp : pfkList) {
			if (fieldMap.containsKey(tmp.k)) {
				error("duplicate show key :" + tmp.k);
			} else {
				fieldMap.put(tmp.p + "." + tmp.f, tmp.k);
			}
		}

		for (Pfk tmp : hierarchyPfkList) {
			if (fieldMap.containsKey(tmp.k)) {
				error("duplicate show key :" + tmp.k);
			} else {
				fieldMap.put(tmp.p + "." + tmp.f, tmp.k);
			}
		}

		List<Map> resultList = new ArrayList<Map>();
		for (Map<String, String> res : mainRows) {

			boolean fit = true;
			for (Pkpk p : pkpkList) {

				if (p.getArgmentType() == ArgumentType.ONE) {
					String p1 = p.getArgList().get(0);
					String f1 = p.getArgList().get(1);
					if (!p.getOperator().operator(res.get(p1 + "." + f1))
							.equals(ConfConstant.TRUE)) {
						fit = false;
						break;
					}
				}

				if (p.getArgmentType() == ArgumentType.TWO) {
					String p1 = p.getArgList().get(0);
					String f1 = p.getArgList().get(1);

					String p2 = p.getArgList().get(2);
					String f2 = p.getArgList().get(3);

					if (!p.getOperator().operator(res.get(p1 + "." + f1),
							res.get(p2 + "." + f2)).equals(ConfConstant.TRUE)) {
						fit = false;
						break;
					}
				}

			}


			if (fit) {

				Map<String, String> tmpMap = new HashMap<String, String>();
				for (String key : res.keySet()) {
					if (fieldMap.containsKey(key) &&  !fieldMap.get(key).endsWith(":FUNCTION")) {

						String value = res.get(key);

						tmpMap.put(fieldMap.get(key), value);
					}
				}

				// 添加由function产生的字段
				for (String key : showFieldFunctionMap.keySet()) {
					Function func = showFieldFunctionMap.get(key);
					String fieldValue = "";

					if (func instanceof FullUrlFunction) {
						String field = ((FullUrlFunction) func).getField();
						fieldValue = res.get(field);
						// tmpMap.remove(ParseUtils.parseFieldShowValue(field));
						fieldValue = func.operator(new Object[] { rootUrl,
								fieldValue });

					} else if (func instanceof SprintfFunction) {
						SprintfFunction sf = (SprintfFunction) func;
						List<String> fields = sf.getFields();
						List<String> args = new ArrayList<String>();
						args.add(sf.getFormat());
						for (String f : fields) {
							args.add(res.get(f));
							// tmpMap.remove(ParseUtils.parseFieldShowValue(f));
						}
						fieldValue = sf.operator(args.toArray());

					} else if (func instanceof ReplaceFunction) {
						ReplaceFunction rpf = (ReplaceFunction) func;
						String field = res.get(rpf.getField());
						fieldValue = rpf.operator(new Object[] { field,
								rpf.getPatternStr(), rpf.getReplaceStr() });

						// tmpMap.remove(ParseUtils.parseFieldShowValue(addf.getField()));
					} else if (func instanceof ClearTagFunction) {
						ClearTagFunction ctf = (ClearTagFunction) func;
						String value = res.get(ctf.getField());
						fieldValue = ctf.operator(new Object[] { value });

						// tmpMap.remove(ParseUtils.parseFieldShowValue(addf.getField()));
					} else if (func instanceof DoubleParameterFunction) {
						// 通用些的放在后面尝试匹配
						// 单参数的,普通形式的函数,包括Add,Maxlength
						String field = ((DoubleParameterFunction) func)
								.getField();
						String parameter = ((DoubleParameterFunction) func)
								.getParameter();
						fieldValue = res.get(field);
						fieldValue = func.operator(new Object[] { fieldValue });

					} else if (func instanceof RecursiveFunction) {

						RecursiveFunction rf = (RecursiveFunction) func;
						List<String> args = new ArrayList<String>();

						String functions = rf.getFunctions();
						args.add(functions);

						String[] params = rf.getParams();

						for (String f : params) {
							// 如果是字段名,则计算字段的值
							if (!f.startsWith("\"")) {
								args.add(res.get(f));
							} else { // 否则,直接添加这个函数
								args.add(ParseUtils.parseStrContent(f));
							}
							// tmpMap.remove(ParseUtils.parseFieldShowValue(f));
						}
						fieldValue = rf.operator(args.toArray());

					}

					// fieldValue=showFieldFunctionMap.get(key).operator(new
					// Object[]{value});
					tmpMap.put(key, fieldValue);
				}

				resultList.add(tmpMap);
			}

		}
		return resultList;

	}

	/**
	 * get main row and call "filterFinalResult"
	 * 
	 * @param pageNameMap
	 * @param pfkList
	 * @param pkpkList
	 * @return
	 * @throws SemanticException
	 */
	@SuppressWarnings("unchecked")
	private List<Map> getFinalResult(Map<String, String> pageNameMap,
			List<Pfk> pfkList, ArrayList<Pfk> hierarchyPfkList,
			List<Pkpk> pkpkList) throws SemanticException {

		long threadId = Thread.currentThread().getId();

		// 把表名排序
		Collections.sort(pfkList, new Comparator() {

			public int compare(Object o1, Object o2) {
				Pfk p1 = (Pfk) o1;
				Pfk p2 = (Pfk) o2;
				return p1.p.compareToIgnoreCase(p2.p);
			}

		});
		//

		// 某个表在显示中涉及的式子
		Map<String, Set<String>> pkfMap = new HashMap();
		for (Pfk tmp : pfkList) {
			if (!pkfMap.containsKey(tmp.p)) {
				Set<String> set = new HashSet();
				set.add(tmp.f);
				pkfMap.put(tmp.p, set);
			} else {
				pkfMap.get(tmp.p).add(tmp.f);
			}

		}

		Map<String, Set<String>> hierarchyPkfMap = new HashMap();
		for (Pfk tmp : hierarchyPfkList) {
			if (!hierarchyPkfMap.containsKey(tmp.p)) {
				Set<String> set = new HashSet();
				set.add(tmp.f);
				hierarchyPkfMap.put(tmp.p, set);
			} else {
				hierarchyPkfMap.get(tmp.p).add(tmp.f);
			}

		}

		// 这个Map记录了某个数据表在IRQL中的投影字段中涉及的字段名。
		// 投影字段可能是空的。
		Map<String, Set<String>> pkpkMap = new HashMap();

		for (Pkpk tmp : pkpkList) {

			if (tmp.getArgmentType() == ArgumentType.ONE) {
				if (tmp.getArgList().size() >= 2) {
					String page = tmp.getArgList().get(0);
					String field = tmp.getArgList().get(1);

					if (!pkpkMap.containsKey(page)) {
						Set<String> set = new HashSet();
						pkpkMap.put(page, set);
					}
					// 先添加set
					// 再往set里加东西
					// 前提是表名在显示列里出现过了
					if (pkfMap.containsKey(page)) {
						pkpkMap.get(page).add(field);
					}
				}

			}

			if (tmp.getArgmentType() == ArgumentType.TWO) {
				if (tmp.getArgList().size() == 2) {
					String page = tmp.getArgList().get(2);
					String field = tmp.getArgList().get(3);

					if (!pkpkMap.containsKey(page)) {
						Set<String> set = new HashSet();
						pkpkMap.put(page, set);
					}

					if (pkfMap.containsKey(page)) {
						pkpkMap.get(page).add(field);
					}
				}
			}

		}

		String curTableName = "";
		String exTableName = "";
		// 以select P1.bcname bcname,P2.scame scame,P3.songname
		// songname,P3.downlink downlink"
		// 为主线,从左到右做连接

		List<Map<String, String>> mainRows = new ArrayList();

		for (Pfk pfk : pfkList) {

			// 一条临时结果
			// 获取表名

			if (!pageNameMap.containsKey(pfk.p)) {
				ParalleIRVirtualMachine.error("invalid pagename:" + pfk.p,
						ErrorType.SEMANTIC);
			}
			String tableName = pageNameMap.get(pfk.p);

			if (mergeMap.containsKey(tableName)) {
				continue;
			}

			if (null == tableName) {
				ParalleIRVirtualMachine.error("invalid page alias" + pfk.p,
						ErrorType.SEMANTIC);
			}

			curTableName = tableName;

			List<Map<String, String>> rows = new ArrayList();

			// 数据不在共享表中的话，到独立表中去找
			// "下载页->down"这种表明肯定在shareTable中找不到
			// 更好的写法应该是 if(tableName.contains("->"))
			if (crawlRSC.getGlobalShareResultTableMap(tableName).size() == 0) {

				// 页名->规则名
				String[] prPair = tableName.split("->");
				if (prPair.length == 2) {

					String pageName = prPair[0];
					String ruleName = prPair[1];
					// 页间总表集合和页内独立表集合中都没有那个表
					if (crawlRSC.getGlobalUniResultTableMap(pageName, ruleName)
							.size() == 0) {
						warn("Thread-" + threadId
								+ ":invalid uniTable pagename:" + pageName
								+ "->" + ruleName);
						continue;
					} else {

						rows = crawlRSC.getGlobalUniResultTableMap(pageName,
								ruleName);
					}
				} else {
					warn("invalid pagename:" + tableName + " or match failed");
				}
			} else {
				// 数据在共享表中
				rows = crawlRSC.getGlobalShareResultTableMap(tableName);
			}

			if (rows.size() == 0) {
				break;
			}

			// 开始一张新表的处理
			if (StringUtils.isNotEmpty(exTableName)
					&& !StringUtils.equalsIgnoreCase(curTableName, exTableName)) {

				// 把mainRows和rows连接
				List<Map<String, String>> tmpRows = new ArrayList();
				for (Map<String, String> result : mainRows) {
					// 新建一条记录，然后把左面表+右面的结果 放进临时表

					for (Map<String, String> map : rows) {
						Map<String, String> tpMap = new HashMap();
						tpMap.putAll(result);
						
						//判断一下pfk.k里是否有 :toByte,如果有的话需要去掉:toByte，才能取到值
						String key=pfk.k;
						if(pfk.k.endsWith(ConfConstant.TO_BYTE)){
							int ix = pfk.k.lastIndexOf(ConfConstant.TO_BYTE);
							key=	key.substring(0, ix);
						}
						
						tpMap.put(pfk.p + "." + pfk.f, map.get(key));

						// 投影字段可能是空的
						if (pkpkMap.containsKey(pfk.p)) {
							Set<String> ext = pkpkMap.get(pfk.p);
							for (String f : ext) {
								tpMap.put(pfk.p + "." + f, map.get(f));
							}
						}
						// 临时表中添加一行
						tmpRows.add(tpMap);
					}

				}

				mainRows = tmpRows;// 笛卡尔积完成

			} else {// 继续放同一张表的数据

				if (StringUtils.isEmpty(exTableName)) {

					for (Map<String, String> map : rows) {
						Map<String, String> tpMap = new HashMap();
						
						String fld=pfk.f;
						if(fld.endsWith(ConfConstant.FUNCTOIN_POSTFIX)){
							fld=fld.substring(0,fld.length()-ConfConstant.FUNCTOIN_POSTFIX.length()  );
						}
						
						tpMap.put(pfk.p + "." + pfk.f, map.get(fld));

						// 投影字段可能是空的
						if (pkpkMap.containsKey(pfk.p)) {
							Set<String> ext = pkpkMap.get(pfk.p);
							for (String f : ext) {
								tpMap.put(pfk.p + "." + f, map.get(f));
							}
						}

						mainRows.add(tpMap);
					}
				} else {

					for (int i = 0; i < mainRows.size(); i += rows.size()) {
						
						
						//遇到有":FUNCTION"的情况下，需要处理一下
						String fld=pfk.f;
						if(fld.endsWith(ConfConstant.FUNCTOIN_POSTFIX)){
							fld=fld.substring(0,fld.length()-ConfConstant.FUNCTOIN_POSTFIX.length()  );
						}
						

						String pf = pfk.p + "." + pfk.f;
						for (int j = 0; j < rows.size(); j++) {
							mainRows.get(i + j).put(pf, rows.get(j).get(fld));

							if (pkpkMap.containsKey(pfk.p)) {
								Set<String> ext = pkpkMap.get(pfk.p);
								for (String f : ext) {
									mainRows.get(i + j).put(pfk.p + "." + f,
											rows.get(j).get(f));
								}
							}
						}
					}

				}

			}

			exTableName = tableName;

		}

		// 可以在所有表处理完之后，再处理融合的表
		for (String targetKey : mergeMap.keySet()) {

			String p = "";
			for (String pkey : pageNameMap.keySet()) {
				if (pageNameMap.get(pkey).equals(targetKey)) {
					p = pkey;
					break;
				}
			}
			if (StringUtils.isEmpty(p)) {
				ParalleIRVirtualMachine.error(
						"invalid page alias:" + targetKey, ErrorType.SEMANTIC);
			}

			if (StringUtils.isEmpty(targetKey))
				continue;

			TableMerge merge = mergeMap.get(targetKey);
			Set<String> srcTbls = merge.getMergedTable();

			List<Map<String, String>> mainMergeRows = new ArrayList();

			// 先添加一条空数据
			// mainRow.add(new HashMap());

			for (String tableName : srcTbls) {

				List<Map<String, String>> rows = new ArrayList();

				String[] prPair = tableName.split("->");

				if (prPair.length == 2) { // 是独立表
					String pageName = prPair[0].trim();
					String ruleName = prPair[1].trim();

					if (crawlRSC.getGlobalUniResultTableMap(pageName, ruleName)
							.size() == 0) {
						warn("Thread-" + threadId
								+ ":invalid uniTable pagename:" + pageName
								+ "->" + ruleName);
						continue;
					} else {
						rows = crawlRSC.getGlobalUniResultTableMap(pageName,
								ruleName);

						if (mainRows.size() == 0) {

							List<Map<String, String>> tmpRows = new ArrayList();

							for (Map<String, String> row : rows) {
								Map tmpMap = new HashMap();
								for (String columName : row.keySet()) {
									tmpMap.put(p + "." + columName, row
											.get(columName));
								}
								tmpRows.add(tmpMap);
							}
							mainRows = tmpRows;
						} else {

							// 2x2=>4
							List<Map<String, String>> tmpRows = new ArrayList();

							for (Map<String, String> mainColumn : mainRows) {

								for (Map<String, String> row : rows) {
									Map<String, String> tmpMap = new HashMap();
									for (String columName : row.keySet()) {
										tmpMap.put(p + "." + columName, row
												.get(columName));
									}
									tmpMap.putAll(mainColumn);
									tmpRows.add(tmpMap);
								}
							}
							mainRows = tmpRows;
						}

						// }
					}
				} else {// 是共享表

					String pageName = tableName.trim();
					if (crawlRSC.getGlobalShareResultTableMap(pageName).size() == 0) {
						warn("Thread-" + threadId + ":invalid  pagename:"
								+ pageName);
						continue;
					} else {
						rows = crawlRSC.getGlobalShareResultTableMap(pageName);

						if (mainRows.size() == 0) {
							mainRows = rows;
						} else {
							for (Map<String, String> mainColumn : mainRows) {
								for (Map<String, String> column : rows) {
									mainColumn.putAll(column);
								}
							}
						}

					}
				}
			}

		}

		// 直接从hierarchyPfkList引用数据
		for (Pfk pfk : hierarchyPfkList) {

			// 获取表名
			String tableName = pageNameMap.get(pfk.p);

			curTableName = tableName;

			if (!hierarchyResult.containsKey(tableName)) {
				String err = "invalid hierarchy tableName:" + tableName;
				err += ",\n if u want to specify a  hierarchy tableName, u needn't to write the matchName";
				err += ",\n because only one matchName can be used as a hierarchyTable";
				error(err);
				continue;
			}
			Map<String, String> row = hierarchyResult.get(tableName);

			// 即使是在继承表里，字段名也是用“.”连起来， 为了统一起见
			// 因此，在函数型字段的处理中，需要注意这个问题
			for (int i = 0; i < mainRows.size(); i++) {
				String pf = pfk.p + "." + pfk.f;
				mainRows.get(i).put(pf, row.get(pfk.f));
			}

		}

		return filterFinalResult(mainRows, pfkList, hierarchyPfkList, pkpkList);

	}

	public List<Map> getMapResult(String irql) throws SemanticException {

		int pm = irql.indexOf(";");
		if (-1 == pm) {
			String err = "invalid IRQL format:" + irql;
			err += "\nhave u forget to put ';' after Page Define?";
			ParalleIRVirtualMachine.error(err, ErrorType.SEMANTIC);
		}
		String pageStr = irql.substring(0, pm);

		Map<String, String> pageNameMap = new HashMap<String, String>();

		String[] fields = pageStr.split(",");
		for (String field : fields) {
			String[] kv = field.split(":");
			if (kv.length == 2) {
				pageNameMap.put(kv[1].trim(), kv[0].trim());
			} else {
				error("error pageMap description:" + field);
			}
		}

		String prefix = "";
		String postfix = "";
		int w = irql.indexOf("where");
		if (w != -1) {
			prefix = irql.substring(pm + 1, w).trim();
			postfix = irql.substring(w);
		} else {
			prefix = irql;
		}

		int s = prefix.indexOf("select");
		if (s == -1) {
			ParalleIRVirtualMachine.error("miss 'select'", ErrorType.GRAMMER);
		}
		s = s + "select".length();

		String fieldStr = prefix.substring(s).trim();

		Set<String> functionalFields = new HashSet();

		Matcher m = FUNC_PATTERN.matcher(fieldStr);

		boolean found = false;
		int last = 0;
		while (m.find(last)) {

			found = true;

			last = m.end();

			String field = m.group(0);
			
			
			

			String functionName = m.group(1);
			Function fun = FunctionNameMap.get(functionName);

			String showField = m.group(3);

			if (null == fun) {
				error("unexisting function anme :" + functionName);
			}

			// 一个参数的函数
			if (uniParameterFunction.contains(functionName)) {
				UniParameterFunction func = (UniParameterFunction) getFunctionInstance(functionName);

				field = m.group(2);

				// 避免忘记写 别名
				if (field.contains(",") || field.contains(")")) {
					error("invalid Uniunction format");
				}
				String tableFieldKey = field.replace("->", ".");
				
//				加上":FUNCTION"后缀
				tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
				
				func.setField(tableFieldKey);
				
				

				
				functionalFields.add(field);
				showFieldFunctionMap.put(showField, func);
			}

			if (doubleParameterFunction.contains(functionName)) {
				DoubleParameterFunction func = (DoubleParameterFunction) getFunctionInstance(functionName);

				String[] args = m.group(2).split(",");

				func.setParameter(ParseUtils.parseStrContent(args[1].trim()));
				if (args.length != 2) {
					error("invalid DoubleParameterFunction format");
				}
				String tableFieldKey = args[0].replace("->", ".");
				
//				加上":FUNCTION"后缀
				tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
				
				
				func.setField(tableFieldKey);
				functionalFields.add(args[0]);// 必须把这个字段加入“结果集中出现的字段”，不能漏掉
				showFieldFunctionMap.put(showField, func);

			}

			// 3个参数的函数
			if (functionName.equals(FunctionConstant.REPLACE)) {
				ReplaceFunction rpf = new ReplaceFunction();
				String[] args = m.group(2).split(",");
				if (args.length != 3) {
					error("invalid parameter for replace function");
				}

				String tableFieldKey = args[0].replace("->", ".");
				
//				加上":FUNCTION"后缀
				tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
				
				rpf.setField(tableFieldKey);

				String patternStr = args[1].replace("\\\"", "");
				String replaceStr = args[2].replace("\\\"", "");
				rpf.setPatternStr(patternStr);
				rpf.setReplaceStr(replaceStr);

				functionalFields.add(tableFieldKey);

				showFieldFunctionMap.put(showField, rpf);
			}

			// N个参数的函数
			if (functionName.equals(FunctionConstant.SPRINGTF)) {
				SprintfFunction spf = new SprintfFunction();
				String[] args = m.group(2).split(",");

				spf.setFormat(ParseUtils.parseStrContent(args[0].trim()));
				ArrayList<String> spFields = new ArrayList();
				for (int i = 1; i < args.length; i++) {
					// 这个字段名需要把共享表，独立表，继承表三种形式统一起来
					String tableFieldKey = args[i].replace("->", ".");
					
					
//					加上":FUNCTION"后缀
					tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
					
					spFields.add(tableFieldKey);
					functionalFields.add(args[i]);
				}
				spf.setFields(spFields.toArray(new String[0]));
				showFieldFunctionMap.put(showField, spf);
			}

			// 逆波兰式 递归函数
			if (functionName.equals(FunctionConstant.RECURSIVE)) {
				RecursiveFunction rf = new RecursiveFunction();
				String[] args = m.group(2).split(",");

				rf.setFunctions(ParseUtils.parseStrContent(args[0].trim()));

				List<String> params = new ArrayList();
				for (int i = 1; i < args.length; i++) {
					// 这个字段名需要把共享表，独立表，继承表三种形式统一起来

					if (!args[i].trim().startsWith("\"")) {
						String tableFieldKey = args[i].replace("->", ".");
						
//						加上":FUNCTION"后缀
						tableFieldKey=tableFieldKey+ConfConstant.FUNCTOIN_POSTFIX;
						
						functionalFields.add(tableFieldKey);
						params.add(args[i]);
					} else {
						params.add(args[i]);
					}
				}
				rf.setParams(params.toArray(new String[0]));
				showFieldFunctionMap.put(showField, rf);
			}

		}

		if (found) {
			fieldStr = m.replaceAll("");
		}

		// 把fieldStr里的函数型字段找出来，换成空
		ArrayList<Pfk> pfk = new ArrayList<Pfk>();

		ArrayList<Pfk> hierarchyPfk = new ArrayList<Pfk>();

		fields = fieldStr.split(",");

		Set<String> metNames = new HashSet<String>();

		// 函数型字段和普通字段分开处理。。。函数型字段没有别名
		//给函数型字段的k值加上 “:FUNCTION”后缀，就不会把单独字段的值显示值冲掉了
		//比如 cleartag(P.f) f, P.f f2,这时候f和f2这两个字段就能分别取道正确的值了
		//f值必须是	“:FUNCTION”结尾，因为filterFinalResult函数中的if (fieldMap.containsKey(key) &&  !fieldMap.get(key).endsWith(":FUNCTION")) {
		//这段里会需要判断是否“:FUNCTION”结尾
		//而k值必须“:FUNCTION”结尾，否则会冲掉P.f的showField
		//fieldMap={"P.f:FUNCTION"=>"f1","P.f"=>"f2"}
		//只要"P.f"=>"f2这条不要被冲掉就可以了，而两个function相互冲掉是没关系的
		for (String field : functionalFields) {
			field = field.trim();

			// 链接语义的数据集
			if (field.indexOf("->") > 0) {
				String[] pf = field.trim().split("->");
				if (pf.length == 2) {
					String f = ParseUtils.getRealFieldName(pf[1]);
					//hierarchyPfk.add(new Pfk(pf[0].trim(), f, f));
					hierarchyPfk.add(new Pfk(pf[0].trim(), f+ConfConstant.FUNCTOIN_POSTFIX, f));
				} else {
					error("error prefix pf:" + field);
				}
			} else {

				String[] pf = field.trim().split("\\.");

				if (pf.length == 2) {
					String f = ParseUtils.getRealFieldName(pf[1]);
					//pfk.add(new Pfk(pf[0].trim(), f, f));
					pfk.add(new Pfk(pf[0].trim(), f+ConfConstant.FUNCTOIN_POSTFIX, f));
				} else {
					error("error prefix pf:" + field);
				}
			}
		}

		for (String field : fields) {

			if (StringUtils.isBlank(field))
				continue;

			field = field.trim();

			String[] kv = field.trim().split("\\s+");

			if (kv.length == 2) {
				// 显示字段重复性校验
				if (!metNames.contains(kv[1].trim())) {
					metNames.add(kv[1].trim());
				} else {
					error("duplicate show field:" + kv[1]);
				}

				// 链接语义的数据集 select P.songName songName,P->downlink downlink;
				if (kv[0].indexOf("->") > 0) {
					String[] pf = kv[0].split("->");
					if (pf.length == 2) {

						hierarchyPfk.add(new Pfk(pf[0].trim(), pf[1].trim(),
								kv[1].trim()));
					} else {
						error("error prefix pf:" + kv[0]);
					}
				} else {

					String[] pf = kv[0].trim().split("\\.");
					if (pf.length == 2) {

						pfk.add(new Pfk(pf[0].trim(), pf[1].trim(), kv[1]
								.trim()));
					} else {
						error("error prefix pf:" + kv[0]);
					}
				}

			} else if (kv.length == 1) { // select P.songName,P->downlink;
				// 链接语义的数据集
				if (kv[0].indexOf("->") > 0) {
					String[] pf = kv[0].trim().split("->");

					if (pf.length == 2) {
						String f = ParseUtils.getRealFieldName(pf[1]);
						// 显示字段重复性校验
						if (!metNames.contains(f)) {
							metNames.add(f);
						} else {
							error("duplicate show field:" + f);
						}

						metNames.add(pf[1]);
						hierarchyPfk.add(new Pfk(pf[0].trim(), f, f));

					} else {
						error("error prefix pf:" + kv[0]);
					}

				} else {

					String[] pf = kv[0].trim().split("\\.");

					if (pf.length == 2) {

						String f = ParseUtils.getRealFieldName(pf[1]);

						if (!metNames.contains(f)) {
							metNames.add(f);
						} else {
							error("duplicate show field:" + f);
						}
						pfk.add(new Pfk(pf[0].trim(), f, pf[1].trim()));
					} else {
						error("error prefix pf:" + kv[0]);
					}
				}
			} else {
				StringBuffer fieldsError = new StringBuffer();
				for (String f : kv) {
					fieldsError.append(f);
				}
				error("error fields description:" + fieldsError);
			}
		}// end foreach fields

		w = postfix.indexOf("where");
		if (w != -1) {
			fieldStr = postfix.substring(w + "where".length()).trim();
		} else {
			fieldStr = "";
		}

		ArrayList<Pkpk> pkpk = new ArrayList();

		fields = new String[0];

		// 投影条件
		if (StringUtils.isNotEmpty(fieldStr)) {
			fields = fieldStr.split("and");
		}

		for (String field : fields) {

			field = field.trim();
			String optag = "=";
			// 算符
			Operator operator = null;

			// 分析表达式中的算符
			for (String op : operators.keySet()) {
				if (field.contains(op)) {
					optag = op;
					operator = operators.get(op);
					break;
				}
			}

			// P1.F1=P2.F2, P1.F1=NULL
			String[] kv = field.split(optag);
			if (kv.length == 2) {// 当前只支持以上两种语法，因此这条应该肯定成立

				String tableKey = kv[0].trim().replace("->", ".");

				String[] pf = tableKey.split("\\.");

				String p1 = null, f1 = null, p2 = null, f2 = null;
				if (pf.length == 2) {
					p1 = pf[0].trim();
					f1 = pf[1].trim();
				} else {
					error("error post pf");
				}

				List args = new CopyOnWriteArrayList();

				String targetTableKey = kv[1].trim();
				// 非空的过滤条件,单参数
				if (targetTableKey.equals(ConfConstant.NULL)) {

					args.addAll(Arrays.asList(new String[] { p1, f1 }));
					Pkpk cmpNullOP = new Pkpk(ArgumentType.ONE, operator, args);
					pkpk.add(cmpNullOP);
				} else if (targetTableKey.startsWith("\"")
						&& targetTableKey.endsWith("\"")) {
					String str = targetTableKey.substring(1, targetTableKey
							.length() - 1);

					// 不等于或者等于字符串
					if (operator instanceof UneqOperator) {
						operator = new UneqStringOperator(str);
					} else {
						operator = new EqStringOperator(str);
					}
					args.addAll(Arrays.asList(new String[] { p1, f1, str }));
					Pkpk cmpStrOP = new Pkpk(ArgumentType.ONE, operator, args);
					pkpk.add(cmpStrOP);

				} else {

					pf = kv[1].trim().split("\\.");
					if (pf.length == 2) {
						p2 = pf[0].trim();
						f2 = pf[1].trim();
						args.addAll(Arrays
								.asList(new String[] { p1, f1, p2, f2 }));
					} else {
						error("error post pf");
					}
					Pkpk cmpOP = new Pkpk(ArgumentType.TWO, operator, args);
					pkpk.add(cmpOP);
				}

			} else {
				error("error fields description");
			}
		}

		return getFinalResult(pageNameMap, pfk, hierarchyPfk, pkpk);

	}

	public static void main(String[] argv) {

		// 构造源数据
		Map<String, List<Map<String, String>>> pageMap = new HashMap();

		// P1的数据
		List<Map<String, String>> page = new ArrayList();
		HashMap map = new HashMap();
		map.put("bcid", "v_bcid");
		map.put("bcname", "v_bcname");
		page.add(map);

		map = new HashMap();
		map.put("bcid", "v_bcid1");
		map.put("bcname", "v_bcname1");
		page.add(map);

		pageMap.put("pagename1", page);

		// P2的数据
		page = new ArrayList();
		map = new HashMap();
		map.put("bcid", "v_bcid");
		map.put("scid", "v_sci");
		map.put("scname", "v_scname");
		page.add(map);

		map = new HashMap();
		map.put("bcid", "v_bcid1");
		map.put("scid", "v_sci1");
		map.put("scname", "v_scname1");

		page.add(map);

		map = new HashMap();
		map.put("bcid", "v_bcid1");
		map.put("scid", "v_sci2");
		map.put("scname", "v_scname2");
		page.add(map);
		pageMap.put("pagename2", page);

		// P3的数据
		page = new ArrayList();
		map = new HashMap();
		map.put("scid", "v_sci");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname1");
		map.put("downlink", "v_downlink1");
		page.add(map);

		map = new HashMap();
		map.put("scid", "v_sci");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname2");
		map.put("downlink", "v_downlink2");
		page.add(map);

		map = new HashMap();
		map.put("scid", "v_sci1");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname3");
		map.put("downlink", "v_downlink3");
		page.add(map);

		map = new HashMap();
		map.put("scid", "v_sci1");
		map.put("scname", "v_scname");
		map.put("songname", "v_songname4");
		map.put("downlink", "v_downlink4");
		page.add(map);

		pageMap.put("pagename3", page);

		Map<String, Map<String, String>> hierarchy = new HashMap();

		// Map<String, List<Map<String, String>>> curUniResultTableMap =new
		// ConcurrentHashMap();
		// ResutTree resutTree = new ResutTree(pageMap,
		// hierarchy,curUniResultTableMap);
		// resutTree.setPageMap(pageMap);
		//
		// String str = "pagename1:P1,pagename2:P2,pagename3:P3;select P1.bcname
		// bcname,P2.scname sncame,P3.songname songname,P3.downlink downlink"
		// + " downlink where P1.bcid=P2.bcid and P2.scid=P3.scid; dao->insert
		// ";
		//
		// int semi = str.lastIndexOf(";");
		//
		// String dao = str.substring(semi + 1);
		// String irql = str.substring(0, semi);
		//
		// ArrayList<Map> am = resutTree.getMapResult(irql);
	}

	public void setMergeMap(Map<String, TableMerge> mergeMap) {
		this.mergeMap = mergeMap;
	}

}
分享到：