浏览 2595 次
锁定老帖子 主题:通配符识别的java实现
精华帖 (0) :: 良好帖 (0) :: 新手帖 (0) :: 隐藏帖 (0)
|
|
---|---|
作者 | 正文 |
发表时间:2009-07-10
前两天在网上无意看到一个关于使用java的正则表达式来进行通配符识别判断的实现。(http://blog.csdn.net/subchen/archive/2007/10/25/1843232.aspx) 看了一下,程序其实有蛮多问题的,甚至都无法通过编译以及测试。我将程序改了一下,并稍微优化了一下结构,如下:
private static final String PATTERN_LINE_START = "^" ; private static final String PATTERN_LINE_END = "$" ; private static final char[] META_CHARACTERS = { '$', '^', '[', ']', '(', ')', '{', '}', '|', '+', '.', '\\' }; /** * match function, support '*' and '?'. * The function is based on regex. * @param pattern * @param str * @return */ public static boolean wildcardMatch(String pattern, String str) { pattern = convertToRegexPattern(pattern); return Pattern.matches(pattern, str); } private static String convertToRegexPattern(String wildcardString) { String result = PATTERN_LINE_START ; char[] chars = wildcardString.toCharArray() ; for (char ch : chars) { if (Arrays.binarySearch(META_CHARACTERS, ch)>=0) { result += "\\" + ch ; continue ; } switch (ch) { case '*': result += ".*"; break; case '?': result += ".{0,1}"; break; default: result += ch; } } result += PATTERN_LINE_END ; return result; }
后来运行了一下,发现这样的效率并不高,原代码中不支持?,而实际中对于?的通配符匹配,尤其是在java类名,文件名的识别上应用并不多,于是,另外写了个快速的实现,不使用正则表达式。 public static boolean simpleWildcardMatch(String pattern, String str) { return wildcardMatch(pattern, str, "*"); } public static boolean wildcardMatch(String pattern, String str, String wildcard) { if (StringUtils.isEmpty(pattern) || StringUtils.isEmpty(str)) { return false; } final boolean startWith = pattern.startsWith(wildcard); final boolean endWith = pattern.endsWith(wildcard); String[] array = StringUtils.split(pattern, wildcard); int currentIndex = -1; int lastIndex = -1 ; switch (array.length) { case 0: return true ; case 1: currentIndex = str.indexOf(array[0]); if (startWith && endWith) { return currentIndex >= 0 ; } if (startWith) { return currentIndex + array[0].length() == str.length(); } if (endWith) { return currentIndex == 0 ; } return str.equals(pattern) ; default: for (String part : array) { currentIndex = str.indexOf(part); if (currentIndex > lastIndex) { lastIndex = currentIndex; continue; } return false; } return true; } } 为了验证这两个方法,测试类借用了一部分原文的测试例子: public class RegexUtilsTest { /** * test '?' and '*' */ @Test public void testWildMatch2() { assertTrue(RegexUtils.wildcardMatch("1234?", "12345")); assertTrue(RegexUtils.wildcardMatch("1234?", "1234")); assertFalse(RegexUtils.wildcardMatch("1234?", "123456")); assertTrue(RegexUtils.wildcardMatch("abc*x?yz*", "abcxxxyz")); assertTrue(RegexUtils.wildcardMatch("abc*xxx?yz*", "abcxxxyz")); } @Test public void testWildMatch() { assertTrue(RegexUtils.wildcardMatch("*", "toto")); assertFalse(RegexUtils.wildcardMatch("toto.java", "tutu.java")); assertFalse(RegexUtils.wildcardMatch("12345", "1234")); assertFalse(RegexUtils.wildcardMatch("*f", "")); assertTrue(RegexUtils.wildcardMatch("***", "toto")); assertFalse(RegexUtils.wildcardMatch("*.java", "toto.")); assertFalse(RegexUtils.wildcardMatch("*.java", "toto.jav")); assertTrue(RegexUtils.wildcardMatch("*.java", "toto.java")); assertFalse(RegexUtils.wildcardMatch("abc*", "")); assertTrue(RegexUtils.wildcardMatch("a*c", "abbbbbccccc")); assertTrue(RegexUtils.wildcardMatch("abc*xyz", "abcxxxyz")); assertTrue(RegexUtils.wildcardMatch("*xyz", "abcxxxyz")); assertTrue(RegexUtils.wildcardMatch("abc**xyz", "abcxxxyz")); assertTrue(RegexUtils.wildcardMatch("abc**x", "abcxxx")); assertTrue(RegexUtils.wildcardMatch("*a*b*c**x", "aaabcxxx")); assertTrue(RegexUtils.wildcardMatch("abc*x*yz", "abcxxxyz")); assertTrue(RegexUtils.wildcardMatch("a*b*c*x*yf*z*", "aabbccxxxeeyffz")); assertFalse(RegexUtils.wildcardMatch("a*b*c*x*yf*zze", "aabbccxxxeeyffz")); assertTrue(RegexUtils.wildcardMatch("a*b*c*x*yf*z", "aabbccxxxeeyffz")); assertTrue(RegexUtils.wildcardMatch("a*b*c*x*yf*ze", "aabbccxxxeeyfze")); assertTrue(RegexUtils.wildcardMatch("*LogServerInterface*.java", "_LogServerInterfaceImpl.java")); assertTrue(RegexUtils.wildcardMatch("*Log*Impl.java", "_LogServerInterfaceImpl.java")); assertTrue(RegexUtils.wildcardMatch("abc*xyz", "abcxyxyz")); String str = this.getClass().getName() ; assertTrue(RegexUtils.wildcardMatch("com.oocllogistics.comp.*", str)); assertTrue(RegexUtils.wildcardMatch(RegexUtilsTest.class.getName(), str)); assertTrue(RegexUtils.wildcardMatch("*.RegexUtilsTest", str)); assertTrue(RegexUtils.wildcardMatch("*com.oocllogistics.comp.util.RegexUtilsTest", str)); assertTrue(RegexUtils.wildcardMatch("*.comp.*", str)); assertFalse(RegexUtils.wildcardMatch("comp.*", str)); assertFalse(RegexUtils.wildcardMatch("*.RegexUtils", str)); assertTrue(RegexUtils.wildcardMatch("com.*.comp.*", str)); assertTrue(RegexUtils.wildcardMatch("com.*.RegexUtilsTest", str)); assertTrue(RegexUtils.wildcardMatch("com.*.RegexUtilsTest*", str)); assertTrue(RegexUtils.wildcardMatch("*com.*.RegexUtilsTest", str)); assertTrue(RegexUtils.wildcardMatch("*com.*.*UtilsTest*", str)); assertTrue(RegexUtils.wildcardMatch("*com**UtilsTest*", str)); assertTrue(RegexUtils.wildcardMatch("**com**Utils**", str)); assertFalse(RegexUtils.wildcardMatch("com.", str)); } @Test public void tesstSimpleWildcardMatch() { assertTrue(RegexUtils.simpleWildcardMatch("*", "toto")); assertFalse(RegexUtils.simpleWildcardMatch("toto.java", "tutu.java")); assertFalse(RegexUtils.simpleWildcardMatch("12345", "1234")); assertFalse(RegexUtils.simpleWildcardMatch("*f", "")); assertTrue(RegexUtils.simpleWildcardMatch("***", "toto")); assertFalse(RegexUtils.simpleWildcardMatch("*.java", "toto.")); assertFalse(RegexUtils.simpleWildcardMatch("*.java", "toto.jav")); assertTrue(RegexUtils.simpleWildcardMatch("*.java", "toto.java")); assertFalse(RegexUtils.simpleWildcardMatch("abc*", "")); assertTrue(RegexUtils.simpleWildcardMatch("a*c", "abbbbbccccc")); assertTrue(RegexUtils.simpleWildcardMatch("abc*xyz", "abcxxxyz")); assertTrue(RegexUtils.simpleWildcardMatch("*xyz", "abcxxxyz")); assertTrue(RegexUtils.simpleWildcardMatch("abc**xyz", "abcxxxyz")); assertTrue(RegexUtils.simpleWildcardMatch("abc**x", "abcxxx")); assertTrue(RegexUtils.simpleWildcardMatch("*a*b*c**x", "aaabcxxx")); assertTrue(RegexUtils.simpleWildcardMatch("abc*x*yz", "abcxxxyz")); assertTrue(RegexUtils.simpleWildcardMatch("a*b*c*x*yf*z*", "aabbccxxxeeyffz")); assertFalse(RegexUtils.simpleWildcardMatch("a*b*c*x*yf*zze", "aabbccxxxeeyffz")); assertTrue(RegexUtils.simpleWildcardMatch("a*b*c*x*yf*z", "aabbccxxxeeyffz")); assertTrue(RegexUtils.simpleWildcardMatch("a*b*c*x*yf*ze", "aabbccxxxeeyfze")); assertTrue(RegexUtils.simpleWildcardMatch("*LogServerInterface*.java", "_LogServerInterfaceImpl.java")); assertTrue(RegexUtils.simpleWildcardMatch("*Log*Impl.java", "_LogServerInterfaceImpl.java")); assertTrue(RegexUtils.simpleWildcardMatch("abc*xyz", "abcxyxyz")); String str = this.getClass().getName() ; assertTrue(RegexUtils.simpleWildcardMatch("com.oocllogistics.comp.*", str)); assertTrue(RegexUtils.simpleWildcardMatch(RegexUtilsTest.class.getName(), str)); assertTrue(RegexUtils.simpleWildcardMatch("*.RegexUtilsTest", str)); assertTrue(RegexUtils.simpleWildcardMatch("*com.oocllogistics.comp.util.RegexUtilsTest", str)); assertTrue(RegexUtils.simpleWildcardMatch("*.comp.*", str)); assertFalse(RegexUtils.simpleWildcardMatch("comp.*", str)); assertFalse(RegexUtils.simpleWildcardMatch("*.RegexUtils", str)); assertTrue(RegexUtils.simpleWildcardMatch("com.*.comp.*", str)); assertTrue(RegexUtils.simpleWildcardMatch("com.*.RegexUtilsTest", str)); assertTrue(RegexUtils.simpleWildcardMatch("com.*.RegexUtilsTest*", str)); assertTrue(RegexUtils.simpleWildcardMatch("*com.*.RegexUtilsTest", str)); assertTrue(RegexUtils.simpleWildcardMatch("*com.*.*UtilsTest*", str)); assertTrue(RegexUtils.simpleWildcardMatch("*com**UtilsTest*", str)); assertTrue(RegexUtils.simpleWildcardMatch("**com**Utils**", str)); assertFalse(RegexUtils.simpleWildcardMatch("com.", str)); } public void performanceTestWildcardMatch() { int loop = 1000 ; long start = System.currentTimeMillis(); for (int i = 0; i < loop; i++) { testWildMatch(); } long end = System.currentTimeMillis(); System.out.println("Loop[1000] cost : " + (end - start) / 1000d + " seconds"); loop = 5000 ; start = System.currentTimeMillis(); for (int i = 0; i < loop; i++) { testWildMatch(); } end = System.currentTimeMillis(); System.out.println("Loop[5000] cost : " + (end - start) / 1000d + " seconds"); loop = 10000 ; start = System.currentTimeMillis(); for (int i = 0; i < loop; i++) { testWildMatch(); } end = System.currentTimeMillis(); System.out.println("Loop[10000] cost : " + (end - start) / 1000d + " seconds"); } public void performanceTestSimpleWildcardMatch() { int loop = 1000 ; long start = System.currentTimeMillis(); for (int i = 0; i < loop; i++) { tesstSimpleWildcardMatch(); } long end = System.currentTimeMillis(); // log.info("cost : "+(end - start)/1000d + " seconds"); System.out.println("Loop[1000] cost : " + (end - start) / 1000d + " seconds"); loop = 5000 ; start = System.currentTimeMillis(); for (int i = 0; i < loop; i++) { tesstSimpleWildcardMatch(); } end = System.currentTimeMillis(); System.out.println("Loop[5000] cost : " + (end - start) / 1000d + " seconds"); loop = 10000 ; start = System.currentTimeMillis(); for (int i = 0; i < loop; i++) { tesstSimpleWildcardMatch(); } end = System.currentTimeMillis(); System.out.println("Loop[10000] cost : " + (end - start) / 1000d + " seconds"); } @Test public void performanceTest() { System.out.println("------Test wildcardMatch---------"); performanceTestWildcardMatch(); System.out.println(); System.out.println("------Test simpleWildcardMatch---------"); performanceTestSimpleWildcardMatch(); } } 其中PerformanceTest方法分别对wildcardMatch喝simpleWildcardMatch进行了1000,5000,10000次重复运行Test Case比较,其中每个case包括38个匹配断言。下面是性能测试结果:
------Test wildcardMatch--------- Loop[1000] cost : 0.703 seconds Loop[5000] cost : 2.547 seconds Loop[10000] cost : 4.907 seconds ------Test simpleWildcardMatch--------- Loop[1000] cost : 0.094 seconds Loop[5000] cost : 0.25 seconds Loop[10000] cost : 0.484 seconds
我们可以看到,使用simpleWildCardMatch要比wildcardMatch方法快上一个数量级。 所以如果有朋友有兴趣,建议使用只支持*号的simpleWildcardMatch。(当然了,实际应用中,这种性能差别其实对application影响并不大) 如果有网友有兴趣,可以尝试优化,或者提供更多的测试。 声明:ITeye文章版权属于作者,受法律保护。没有作者书面许可不得转载。
推荐链接
|
|
返回顶楼 | |