一、题目如下:
--------------------------
Write a parser for a simplified regular expression
On an alphabet set [a-z], a simplified regular expression is much simpler than the normal regular expression.
It has only two meta characters: '.' and '*'.
'.' -- exact one arbitrary character match.
'*' -- zero or more arbitrary character match.
--------------------------
具个例子,表达式 ab.*d 能匹配 'abcd', 'abccd', 'abad'。 不能匹配'abc', ' '(空字符串), 'abd'。
二、解法:
解析给定的表达式
生成对应的 NFA(Nondeterministic Finite Automation)
NFA转化为DFA(Deterministic Finite Automation)
利用DFA判断输入字符串
不懂正则表达式? 参考http://deerchao.net/tutorials/regex/regex.htm
不懂NFA? 参考http://planning.cs.uiuc.edu/node558.html
不懂DFA?参考http://lambda.uta.edu/cse5317/notes/node8.html
三、相关代码:
FiniteAutomata.java 和State.java,随手写写,请选择性参考。
FiniteAutomata.java
package parser;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
public class FiniteAutomata {
private State start;
private final static char[] inputs;
static {
inputs = new char[26];
for (char i = 0; i < 26; i++) {
inputs[i] = (char) ('a' + i);
}
}
private final char episilon = 0;
private char stateId;
public void compile(String pattern) {
this.stateId = 'A';
State NFA = toNFA(pattern);
this.start = convertToDFA(NFA);
}
private State convertToDFA(State NFA) {
Map<Set<State>, State> cache = new HashMap<Set<State>, State>();
Queue<Set<State>> queue = new LinkedList<Set<State>>();
// start state of DFA
Set<State> start = episilon(NFA);
State starter = nextState();
cache.put(start, starter);
queue.offer(start);
while (!queue.isEmpty()) {
Set<State> currentEpisilon = queue.poll();
State current = cache.get(currentEpisilon);
// create new State
for (char input : inputs) {
Set<State> nextEpisilon = move(currentEpisilon, input);
if (nextEpisilon.isEmpty()) {
continue;
}
State next;
if (!cache.containsKey(nextEpisilon)) {
next = nextState();
cache.put(nextEpisilon, next);
queue.offer(nextEpisilon);
} else {
next = cache.get(nextEpisilon);
}
boolean isAccept = containsAcceptState(nextEpisilon);
next.setAccept(isAccept);
current.setTransition(input, next);
}
}
return starter;
}
private Set<State> move(Set<State> currentEpisilon, char input) {
Set<State> result = new HashSet<State>();
for (State s : currentEpisilon) {
State next = s.transit(input);
if (next != null) {
result.add(next);
}
}
return episilon(result);
}
private boolean containsAcceptState(Set<State> nextEpisilon) {
for (State state : nextEpisilon) {
if (state.isAccept()) {
return true;
}
}
return false;
}
private Set<State> episilon(State s) {
Set<State> cache = new HashSet<State>();
cache.add(s);
return episilon(s, cache);
}
private Set<State> episilon(Set<State> states) {
Set<State> cache = new HashSet<State>();
for (State s : states) {
cache.add(s);
cache.addAll(episilon(s, cache));
}
return cache;
}
private Set<State> episilon(State s, Set<State> cache) {
State next = s.transit(episilon);
if (next != null && !cache.contains(next)) {
cache.add(next);
cache.addAll(episilon(s, cache));
}
return cache;
}
private State toNFA(String pattern) {
char[] expr = pattern.toCharArray();
State currentState = nextState();
State starter = currentState;
for (char c : expr) {
if (c == '.') {
State nextState = nextState();
// add each transition for all inputs
for (char i : inputs) {
currentState.setTransition(i, nextState);
}
currentState = nextState;
} else if (c == '*') {
State nextState = nextState();
// add each transition for all inputs
for (char i : inputs) {
currentState.setTransition(i, nextState);
}
currentState.setTransition(episilon, nextState);
nextState.setTransition(episilon, currentState);
currentState = nextState;
} else if (c >= 'a' && c <= 'z') {
State nextState = nextState();
currentState.setTransition(c, nextState);
currentState = nextState;
} else {
throw new RuntimeException("Unrecognized expression");
}
}
currentState.setAccept(true);
return starter;
}
private State nextState() {
return new State(stateId++);
}
public void constructDFA(Map<State, Map<Character, State>> mapping) {
Iterator<Map.Entry<State, Map<Character, State>>> it = mapping
.entrySet().iterator();
while (it.hasNext()) {
Entry<State, Map<Character, State>> entry = it.next();
State state = entry.getKey();
Map<Character, State> transition = entry.getValue();
state.setTransition(transition);
}
}
public boolean match(String c) {
char[] candidate = c.toCharArray();
for (char d : candidate) {
start = start.transit(d);
if (start == null) {
return false;
}
}
return start.isAccept();
}
public static String[] patterns = { "abc", "*", "*abc", "*abc", "a*bc",
"a*bc", "a*", "a*", "a*", "a*", "*abc*", "*****", "...", ".*",
".bc*", ".b*c*a", "*", "abc", "*a", "a", ".a*c", "a.*b", "..", "",
"" };
public static String[] candidates = { "abc", "abc", "abc", "aaabbbabc",
"aaabbbabc", "abc", "abc", "a", "aa", "abcdef", "abc", "abc",
"abc", "abc", "abc", "abca", "", "abcd", "abcd", "", "abc", "abc",
"abc", "", "abc" };
public static void main(String[] args) {
FiniteAutomata fa = new FiniteAutomata();
for (int i = 0; i < patterns.length; i++) {
fa.compile(patterns[i]);
System.out.println(fa.match(candidates[i]));
}
}
}
State.java
package parser;
import java.util.HashMap;
import java.util.Map;
public class State {
private boolean accept = false;
private char id;
private Map<Character, State> mapping = new HashMap<Character, State>();
public State(char c) {
this.id = c;
}
public State(char c, boolean isAccept) {
this.id = c;
this.accept = isAccept;
}
public State transit(char input) {
return mapping.get(input);
}
public void setTransition(char input, State next) {
this.mapping.put(input, next);
}
public void setTransition(Map<Character, State> mapping) {
this.mapping = mapping;
}
public boolean isAccept() {
return this.accept;
}
public void setAccept(boolean b) {
this.accept = b;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + id;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
State other = (State) obj;
if (id != other.id)
return false;
return true;
}
@Override
public String toString() {
return "State [id=" + id + "]";
}
}
分享到:
相关推荐
Xpdf是一款开源的PDF文档阅读和处理工具,专门针对中文环境进行了优化,名为"xpdf-chinese-simplified",其压缩包文件"xpdf-chinese-simplified.zip"包含了适用于简体中文用户的所有组件。这款软件在IT领域中具有...
《xpdf-chinese-simplified:增强SWFTools的中文支持》 在IT行业中,处理文本和图形文件格式是一项常见的任务,而SWFTools是一个国外开发的开源软件工具集,主要用于处理Adobe Flash(SWF)文件。然而,由于其最初...
标题中的"xpdf-chinese-simplified.rar"表明这是一个与处理中文PDF文档相关的压缩包,其中包含了XPDF工具的简体中文版本。XPDF是一款开源的PDF文档处理工具集,主要用于PDF文档的查看、转换和提取信息,尤其在处理非...
《xpdf-chinese-simplified:中文简体版PDF阅读器组件详解》 在信息技术领域,PDF(Portable Document Format)格式因其跨平台、保留原始布局和样式的特点,被广泛用于文档交换和阅读。为了方便中文用户阅读PDF文档...
cidToUnicode Adobe-GB1 c:\xpdf\xpdf-chinese-simplified\Adobe-GB1.cidToUnicode unicodeMap ISO-2022-CN c:\xpdf\xpdf-chinese-simplified\ISO-2022-CN.unicodeMap unicodeMap EUC-...
Microsoft SQL Server 2000 Developer Edition (Chinese-Simplified) ,可做收藏、学习、研究。
pcasm-book-simplified-chinese.pdf是一个翻译成中文的汇编语言教程,不是详细的介绍汇编语言,只有一些最基本的方面的介绍。不过这本书是叙述在80386和后来的处理器如何在保护模式下编程。用NASM和DJGPP C/C++...
CMMI-DEV-v1.3-Simplified-Chinese-FINAL
SWFTools中文字库:xpdf-chinese-simplified 1.里面包含了 gkai00mp.ttf,无需另外下载 2.下载解压后修改xpdf-chinese-simplified目录下的add-to-xpdfrc文件中的 所有路径,(当前路径) 3.ok 了
标题“sw2007chinese-simplified”指的是SolidWorks 2007的简体中文版软件包。SolidWorks是一款流行的三维机械设计软件,广泛应用于工程和制造业,用于创建、模拟、发布和管理产品的3D模型。这个特定的版本是针对...
这篇《MyBatis-3-User-Guide-Simplified-Chinese》是针对中国开发者的一份详细指南,提供了全面的MyBatis3使用方法和最佳实践。 首先,MyBatis的核心概念是Mapper接口和XML映射文件。Mapper接口定义了数据库操作的...
文件放服务器下载,请务必到电脑端资源详情查看然后下载,google-noto-sans-simplified-chinese-fonts-20141117-5.el7.noarch.rpm
Windows Server 2008 R2 HPC Edition (x64) - DVD (Chinese-Simplified)下载地址,赚点积分,感谢感谢!赚点积分,感谢感谢!赚点积分,感谢感谢!赚点积分,感谢感谢!赚点积分,感谢感谢!赚点积分,感谢感谢!
ArtiosCAD是一款由 Esko 公司开发的专业包装设计软件,广泛应用于印刷、包装和广告设计行业。ArtiosCAD 14 是该系列的第14个主要版本,专为简体中文用户提供了本地化界面,使得中国用户能更加便捷地进行包装结构设计...
Publisher: Prentice Hall PTR Pub Date: September 15, 2003 ISBN: 0-13-066946-6 Pages: 608 Section 2.7....Section 2.8....Section 2.9....Section 2.10....Section 2.11....Section 2.12....Section 2.13....Section 2.14....