trie树--AC自动机

itace

浏览: 187621 次
性别:
来自: 北京

最近访客更多访客>>

u012363178

wangyy

tianshiguishu

Sharpleo

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

java
算法

package com.chipmunk.algorithm.trie;


public class Branch {
	
	
	private char word;
	private byte status = 0;//0词语未结束1词语结束
	private Branch[] branches = null;
	
	
	public Branch(char word) {
		super();
		this.word = word;
	}

	public Branch(char word, byte status) {
		super();
		this.word = word;
		this.status = status;
	}

	/**
	 * 
	 * @param b
	 * @return
	 */
	public Branch add(Branch b){
		if (branches==null) {
			branches=new Branch[1];
			branches[0] = b;
			return branches[0];
		}else {
			char w1=b.getWord();
			//判断是否已经有相等的字
			int i = -1;
			for (int j = 0; j < branches.length; j++) {
				Branch bb =branches[j]; 
				char w2 = bb.getWord();
				if (w1==w2) {
					i=j;
					//如果原来这个字不是结束符，更改为结束符
					if (b.getStatus()==1&&bb.getStatus()==0) {
						bb.setStatus(b.getStatus());
					}
					break;
				}
			}
			if (i>-1) {//字已经添加过了
				return branches[i];
			}else {//添加一个字
				Branch[]branches2=new Branch[branches.length+1];
				System.arraycopy(branches,0,branches2,0,branches.length);
				branches2[branches2.length-1]=b;
				branches=branches2;
				return branches[branches.length-1];
			}
		}
		
	}
	
	public char getWord() {
		return word;
	}

	public void setWord(char word) {
		this.word = word;
	}

	public byte getStatus() {
		return status;
	}

	public void setStatus(byte status) {
		this.status = status;
	}

	public Branch[] getBranches() {
		return branches;
	}

	public void setBranches(Branch[] branches) {
		this.branches = branches;
	}

}

package com.chipmunk.algorithm.trie;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;


public class TrieTree {

	/**
	 * 向trie树添加新词
	 * @param root
	 * @param text
	 */
	private static void addWords(Branch root,String text){
		Branch branch_grow = root;
		char[] chars = text.toCharArray();
		for (int i = 0; i < chars.length; i++) {
			byte status = 0;
			if (i == chars.length-1) {
				status = 1;
			}
			Branch b = branch_grow.add(new Branch(chars[i], status));
			branch_grow=b;
		}
	}
	/**
	 * 搜索一个词---全匹配
	 * @param root 树根
	 * @param text 搜索词
	 * @return
	 */
	private static boolean searchTerm(Branch root,String text){
		char[]cs = text.toCharArray();
		Branch finder = root;
		for (int i = 0; i < cs.length; i++) {
			char c = cs[i];
			boolean bool_equal = false;
			boolean bool_last = i==cs.length-1;//最后一个字
			Branch[]bs = finder.getBranches();
			if (bs!=null) {
				for (Branch branch : bs) {
					char w = branch.getWord();
					if (c==w) {
						finder=branch;
						if (bool_last) {
							byte status = branch.getStatus();
							if (status==0) {//是否是词语最后一个字
								bool_equal = false;
							}else {
								bool_equal = true;
							}
						}else {
							bool_equal = true;
						}
						break;
					}
				}
			}
			if (!bool_equal) {
				return false;
			}
		}
		return true;
	}
	/**
	 * 搜索一个词---部分匹配
	 * @param root 树根
	 * @param text 搜索词
	 * @return
	 */
	public static List<String> searchTermPart(Branch root,String text){
		char[]cs = text.toCharArray();
		List<String> list = new ArrayList<String>();
		StringBuffer text_temp = new StringBuffer();
		Branch finder = root;
		for (int i = 0; i < cs.length; i++) {
			char c = cs[i];
			boolean bool_equal = false;//是否相等
			Branch[]bs = finder.getBranches();
			if (bs!=null) {
				for (Branch branch : bs) {
					char w = branch.getWord();
					if (c==w) {
						finder=branch;
						byte status = branch.getStatus();
						text_temp.append(c);
//						System.out.println(status+"--"+c);
						if (status==1) {//词语结尾标示
							list.add(text_temp.toString());
						}
						bool_equal = true;
						break;
					}
				}
			}
			if (!bool_equal&&text_temp.length()>0) {
				return list;
			}
		}
		return list;
	}
	public static List<String> searchAllTerms(Branch root,String text){
		char[]cs = text.toCharArray();
		List<String> list = new ArrayList<String>();
		StringBuffer text_temp = new StringBuffer();
		Branch finder = root;
		for (int i = 0; i < cs.length; i++) {
//			System.out.println(i);
			char c = cs[i];
			boolean bool_equal = false;//是否相等
			Branch[]bs = finder.getBranches();
			if (bs!=null) {
				for (Branch branch : bs) {
					char w = branch.getWord();
					if (c==w) {
						finder=branch;
						byte status = branch.getStatus();
						text_temp.append(c);
//						System.out.println(status+"--"+c+"--"+i);
						if (status==1) {
							list.add(text_temp.toString());
						}
						bool_equal = true;
						break;
					}
				}
			}
			//到了不等的文字节点
			if (!bool_equal&&text_temp.length()>0) {
				i=i-text_temp.length();
				text_temp=new StringBuffer();//临时匹配文字组归零
				finder=root;//文字查找器回trie树根部
			}
		}
		return list;
	}
	/**
	 * 种树--建立trie树
	 * @param words
	 * @return
	 */
	public static Branch plantTree(Collection<String> words){
		Branch root = new Branch(' ');
		for (String w : words) {
			addWords(root, w);
		}
		return root;
	}
	/**
	 * 打印所有树枝字
	 * @param tree
	 */
	public static void print(Branch tree){
		Branch[] bs = tree.getBranches();
		if (bs!=null) {
			for (Branch branch : bs) {
				System.out.print(branch.getWord());
				if (branch.getStatus()==1) {
					System.out.println();
				}
				print(branch);
			}
		}
	}
	public static void main(String[] args) {
		
		List<String> words = new ArrayList<String>();
		words.add("中国人");
		words.add("中华民族");
		words.add("中华");
		words.add("大中华区");
		words.add("中华人民共和国");
		words.add("中华民国");
		words.add("华人");
		words.add("国歌");
		Branch tree = plantTree(words);
		
		System.out.println(searchTerm(tree, "中华人民共和国"));
		System.out.println(searchAllTerms(tree, "唱中华人民共和国国歌是"));
		//										 01234 56789 10
//		print(tree);
		
	}
}

分享到：

欧拉七桥问题 | Hashmap深入解析【转】

2016-09-22 16:49
浏览 494
评论(0)
分类:开源软件
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

trie树--AC自动机

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

trie树--AC自动机

评论

发表评论

相关推荐

jdk-linux环境变量

BufferedImage由Gray转成RGB，颜色不变

jni和C++二维数组

对象转换成真实对象泛型

获取连通区域

更改html项目中的中文图片中文文件等中文路径

session唯一登录用户

maven配置

POI3.14 Provider com.bea.xml.stream.EventFactory not found

java多个安装版本切换

java

Json格式化

apache开源软件版本号

图像边缘检测算子

数据挖掘算法对比

期望值，方差，标准差，协方差，相关系数

基本函数求导

正则表达式贪婪与懒惰模式

欧拉七桥问题

Hashmap深入解析【转】

最近访客更多访客>>