过滤HTML部分

jinxhj2003

浏览: 150146 次
性别:
来自: 南昌

最近访客更多访客>>

tom2139779

java113096

项志鹏同學

wellxu万岁

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

java

HTML Java

package cn.jxsme.util.tool;

import java.util.regex.Pattern;

/*
* autho huangjin green eat
*Oct 17, 2008
*/
public class FifterHtml {

public static String Html2Text(String inputString) {
    String htmlStr = inputString; //��html��ǩ��ַ�
        String textStr ="";
java.util.regex.Pattern p_script;
java.util.regex.Matcher m_script;
java.util.regex.Pattern p_style;
java.util.regex.Matcher m_style;
java.util.regex.Pattern p_html;
java.util.regex.Matcher m_html;

try {
   String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"; //��script��ʽ{��<script[^>]*?>[\\s\\S]*?<\\/script> }
   String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"; //��style��ʽ{��<style[^>]*?>[\\s\\S]*?<\\/style> }
     // String regEx_html = "<[^>]+>"; //��HTML��ǩ��ʽ
   String regEx_html = "<[.[^<]]*>";
      p_script = Pattern.compile(regEx_script,Pattern.CASE_INSENSITIVE);
      m_script = p_script.matcher(htmlStr);
      htmlStr = m_script.replaceAll(""); //��script��ǩ

      p_style = Pattern.compile(regEx_style,Pattern.CASE_INSENSITIVE);
      m_style = p_style.matcher(htmlStr);
      htmlStr = m_style.replaceAll(""); //��style��ǩ

      p_html = Pattern.compile(regEx_html,Pattern.CASE_INSENSITIVE);
      m_html = p_html.matcher(htmlStr);
      htmlStr = m_html.replaceAll(""); //��html��ǩ

     textStr= htmlStr.replaceAll("<td>", "");
     textStr=htmlStr.replaceAll("<","");
     textStr=htmlStr.replaceAll(">","");
   textStr = htmlStr;

}catch(Exception e) {
   System.err.println("yjxHtml2Text().Html2Text: " + e.getMessage());
}

return textStr;//��ı��ַ�
   }
public static String FifterSQL(String str)
    {
          return str.replaceAll(".*([';]+|(--)+).*", " ");

    }
public static String TestToHtml(String input){
   if (input == null) {
return null;
}
if (input.length() == 0) {
return input;
}
input = input.replaceAll(" ", " ");
input = input.replaceAll("\r\n", "<br/>");
input = input.replaceAll("\n", "<br/>");
return input;
   }

public static String HtmlToTest(String input){
       if (input == null) {
return null;
}
if (input.length() == 0) {
return input;
}
input = input.replaceAll( " "," ");
input = input.replaceAll("<br/>","\r\n");
input = input.replaceAll("<br/>","\n");
return input;

       }
}

分享到：

java工具日期部分 | 生成文件部分

2009-07-14 10:34
浏览 939
评论(0)
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论