论坛首页 入门技术论坛

Java 之正则表达式

浏览 4383 次
该帖已经被评为新手帖
作者 正文
   发表时间:2010-02-26   最后修改:2010-02-26
package example.regularexpressions;

import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import junit.framework.TestCase;

public class Basics extends TestCase {
	
	/**
	 * Pattern类:
	 * Pattern类的静态方法compile用来编译正则表达式,在此[,\\s]+表示若干个","或者若干个空格匹配
	 * split方法使用正则匹配将字符串切割成各子串并且返回
	 * @throws Exception
	 */
	public void _test1() throws Exception {
		Pattern pattern = Pattern.compile("[,\\s]+");
		String[] result = pattern.split("one two  three,four , five,six");
		for (String str : result) {
			System.out.println(str);
		}
	}
	
	/**
	 * Matcher类:
	 * 注意,Matcher的获得是通过Pattern.matcher(CharSequence charSequence);输入必须是实现了CharSequence接口的类
	 * 常用方法:
	 * matches()判断整个输入串是否匹配,整个匹配则返回true
	 * lookingAt()从头开始寻找,找到匹配则返回true
	 * @throws Exception
	 */
	public void _test2() throws Exception {
		String str1 = "hello";
		Pattern pattern1 = Pattern.compile("hello");
		Matcher matcher1 = pattern1.matcher(str1);
		System.out.println("matcher1.matches()=>" + matcher1.matches());
		
		String str2 = "hello world";
		Pattern pattern2 = Pattern.compile("hello");
		Matcher matcher2 = pattern2.matcher(str2);
		System.out.println("matcher2.matches()=>" + matcher2.matches());
		System.out.println("matcher2.lookingAt()=>" + matcher2.lookingAt());
	}
	
	/**
	 * find()扫描输入串,寻找下一个匹配子串,存在则返回true
	 * @throws Exception
	 */
	public void _test3() throws Exception {
		Pattern pattern = Pattern.compile("hello");
		Matcher matcher = pattern.matcher("hello world, hello world, hello_world");
		StringBuffer sb = new StringBuffer();
		boolean find = matcher.find();
		while(find) {
			matcher.appendReplacement(sb, "haha");	//实现非终端添加和替换步骤
			find = matcher.find();
			System.out.println("sb=>" + sb);
		}
		matcher.appendTail(sb);		//实现终端添加和替换步骤
		System.out.println(sb.toString());
	}
	
	/**
	 * 匹配IP地址
	 * IP地址中的句点字符必须进行转义处理(前面加上“\”),因为IP地址中的句点具有它本来的含义,
	 * 而不是采用正则表达式语法中的特殊含义。句点在正则表达式中的特殊含义本文前面已经介绍。 
	 * 日志记录的时间部分由一对方括号包围。你可以按照如下思路提取出方括号里面的所有内容:
	 * 首先搜索起始方括号字符(“[”),提取出所有不超过结束方括号字符(“]”)的内容,向前寻找直至找到结束方括号字符。
	 * @throws Exception
	 */
	public void _test4() throws Exception {
		String logEntry = "192.168.0.1 - - [26/Feb/2009:14:56:43 -0500]\"GET /lsAlive/ht HTTP/1.0\"200 15\r\n"
			+"192.168.0.2 - - [25/Feb/2009:14:56:43 -0500]\"GET /lsAlive/ht HTTP/1.0\"200 15";
		String regexp = "([0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3})\\s-\\s-\\s\\[([^\\]]+)\\]";
		Pattern pattern = Pattern.compile(regexp);
		Matcher matcher = pattern.matcher(logEntry);
		boolean find = matcher.find();
		while(find) {
			MatchResult result = matcher.toMatchResult();
			System.out.println("IP=>" + result.group(1));
			System.out.println("Timestamp=>" + result.group(2));
			find = matcher.find();
		}
	}
	
	/**
	 * HTML处理
	 * 分析HTML页面内FONT标记的所有属性
	 * @throws Exception
	 */
	public void _test5() throws Exception {
		String html = "<font face=\"Arial Serif\" size=\"+2\" color=\"red\">";
		String regexForTag = "<\\s*font\\s+([^>]*)\\s*>";
		
		Pattern pattern = Pattern.compile(regexForTag, Pattern.CASE_INSENSITIVE);
		Matcher matcher = pattern.matcher(html);
		
		boolean find = matcher.find();
		
		String attribute = matcher.group(1);
		System.out.println("属性字符串为:" + attribute);
		
		String regexForAttribute = "([a-z]+)\\s*=\\s*\"([^\"]+)\"";
		Pattern pattern2 = Pattern.compile(regexForAttribute, Pattern.CASE_INSENSITIVE);
		Matcher matcher2 = pattern2.matcher(attribute);
		
		boolean find2 = matcher2.find();
		
		while(find2) {
			MatchResult result = matcher2.toMatchResult();
			System.out.println(result.group(1) + "=" + result.group(2));
			find2 = matcher2.find();
		}
	}
	
	/**
	 * HTML处理
	 * 修改一些页面中的链接
	 * @throws Exception
	 */
	public void test6() throws Exception {
		String url = "<a href=\"http://192.168.0.1:8080/test/index.jsp#test...\">"
			+ "< a href = \"http://192.168.0.1:8080/test/index.jsp#?hahahaha...\">";
		String regex = "(<\\s*a\\s+href\\s*=\\s*\"http://192.168.0.1:8080/test/index.jsp[^\"]+\">)";
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher(url);
		boolean find = matcher.find();
		System.out.println("find=>" + find);
		while(find) {
			MatchResult result = matcher.toMatchResult();
			String temp = result.group(1);
			System.out.println("替换前=>" + temp);
			temp = temp.replace("192.168.0.1", "localhost");
			System.out.println("替换后=>" + temp);
			find = matcher.find();
		}
	}
	
	/**
	 * 4种常用功能:
	 * 1、查询:
	 * 如果str中有regEx,那么rs为true,否则为flase。如果想在查找时忽略大小写,
	 * 则可以写成Pattern p=Pattern.compile(regEx,Pattern.CASE_INSENSITIVE);
	 * @throws Exception
	 */
	public void _testQuery() throws Exception {
		String str = "abc efg ABC";
		String regEx = "a|f";
		Pattern pattern = Pattern.compile(regEx);
		Matcher matcher = pattern.matcher(str);
		boolean rs = matcher.find();
		System.out.println("rs=>" + rs);
	}
	
	/**
	 * 2、提取:
	 * 执行结果为name.txt,提取的字符串储存在m.group(i)中,其中i最大值为m.groupCount();
	 * @throws Exception
	 */
	public void _testGet() throws Exception {
		String regEx = ".+\\\\(.+)$";
		String str = "c:\\dir1\\dir2\\name.txt";
		Pattern pattern = Pattern.compile(regEx);
		Matcher matcher = pattern.matcher(str);
		boolean rs = matcher.find();
		for (int i = 1; i <= matcher.groupCount(); i++) {
			System.out.println(matcher.group(i));
		}
	}
	
	/**
	 * 3、分割:
	 * @throws Exception
	 */
	public void _testSplit() throws Exception {
		String regex = "::";
		Pattern pattern = Pattern.compile(regex);
		String[] result = pattern.split("aa::bb::cc");
		for (String str : result)
			System.out.println("result=>" + str);
		
		System.out.println("---------");
		String[] normal = "aa::bb::cc".split(regex);
		for (String str : normal) 
			System.out.println("nornal=>" + str);
	}
	
	/**
	 * 4、替换(删除):
	 * 如果写成空串,既可达到删除的功能
	 * @throws Exception
	 */
	public void _testReplaceOrDelete() throws Exception {
		String regex = "a+";
		Pattern pattern = Pattern.compile(regex);
		Matcher matcher = pattern.matcher("aaabbced a ccdeaa");
		System.out.println("replaceFirst=>" + matcher.replaceFirst("A"));
		String result = matcher.replaceAll("A");
		System.out.println("replaceAll=>" + result);
		String delete = matcher.replaceAll("");
		System.out.println("替换为空即可达到删除的功能");
	}

	@Override
	protected void setUp() throws Exception {
		// TODO Auto-generated method stub
		super.setUp();
	}

	@Override
	protected void tearDown() throws Exception {
		// TODO Auto-generated method stub
		super.tearDown();
	}
	
}

 

   发表时间:2010-02-26  
这是基础帖了。。。。
0 请登录后投票
   发表时间:2010-02-26  
是的,哈哈,以前看过一点,但要用的时候发现又忘了,只好再看一次花个半天的时间整理下做个备份
0 请登录后投票
论坛首页 入门技术版

跳转论坛:
Global site tag (gtag.js) - Google Analytics