java检测html是否闭合

vtrtbb

浏览: 365080 次
性别:
来自: 北京

最近访客更多访客>>

u012363178

ganxueyun

xx5333

wsl455586841

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

java

HTML Java .net Blog

import java.util.Arrays;

class TagsList 
{
    private String[] data;
    private int size = 0;

    public TagsList(int size) 
    {
        data = new String[size];
    }

    public TagsList() 
    {
        this(10);
    }

    public void add(String str) 
    {
        ensureCapacity(size + 1);
        data[size++] = str;
    }

    public String get(int index) 
    {
        if (index < size)
            return data[index];
        else
            return null;
    }

    //为了提高效率，只将其置为null
    public boolean remove(String str) 
    {
        for (int index = 0; index < size; index++) {
            if (str.equals(data[index])) {
                data[index] = null;
                return true;
            }
        }
        return false;
    }
    
    public boolean remove(int index)
    {
        if (index < data.length) {
            data[index] = null;
            return true;
        }
        return false;
    }

    public int size() 
    {
        return this.size;
    }

    //扩展容量
    public void ensureCapacity(int minSize) 
    {
        int oldCapacity = data.length;
        if (minSize > oldCapacity) {
            int newCapacity = (oldCapacity * 3 / 2 + 1) > minSize ? 
                    oldCapacity * 3 / 2 + 1 : minSize;
            data = (String[]) Arrays.copyOf(data, newCapacity);
        }
    }
}

public class TagsChecker 
{
    public static boolean check(String str) 
    {
        TagsList[] unclosedTags = getUnclosedTags(str);

        if (unclosedTags[0].size() != 0) {
            return false;
        }
        for (int i = 0; i < unclosedTags[1].size(); i++) {
            if (unclosedTags[1].get(i) != null)
                return false;
        }

        return true;
    }

    public static String fix(String str) 
    {
        StringBuffer fixed = new StringBuffer(); // 存放修复后的字符串
        TagsList[] unclosedTags = getUnclosedTags(str);

        // 生成新字符串
        for (int i = unclosedTags[0].size() - 1; i > -1; i--) {
            fixed.append("<" + unclosedTags[0].get(i) + ">");
        }

        fixed.append(str);

        for (int i = unclosedTags[1].size() - 1; i > -1; i--) {
            String s = null;
            if ((s = unclosedTags[1].get(i)) != null) {
                fixed.append("</" + s + ">");
            }
        }

        return fixed.toString();
    }

    private static TagsList[] getUnclosedTags(String str) 
    {
        StringBuffer temp = new StringBuffer(); // 存放标签
        TagsList[] unclosedTags = new TagsList[2];
        unclosedTags[0] = new TagsList(); // 前不闭合，如有</div>而前面没有<div>
        unclosedTags[1] = new TagsList(); // 后不闭合，如有<div>而后面没有</div>
        boolean flag = false; // 记录双引号"或单引号'
        char currentJump = ' '; //记录需要跳过''还是""

        char current = ' ', last = ' '; // 当前 & 上一个

        // 开始判断
        for (int i = 0; i < str.length();) {
            current = str.charAt(i++); // 读取一个字符
            if (current == '"' || current == '\'') {
                flag = flag ? false : true; // 若为引号，flag翻转
                currentJump = current;
            }
            if (!flag) {
                if (current == '<') { // 开始提取标签
                    current = str.charAt(i++);
                    if (current == '/') { // 标签的闭合部分，如</div>
                        current = str.charAt(i++);

                        // 读取标签
                        while (i < str.length() && current != '>') {
                            temp.append(current);
                            current = str.charAt(i++);
                        }

                        // 从tags_bottom移除一个闭合的标签
                        if (!unclosedTags[1].remove(temp.toString())) { // 若移除失败，说明前面没有需要闭合的标签
                            unclosedTags[0].add(temp.toString()); // 此标签需要前闭合
                        }
                        temp.delete(0, temp.length()); // 清空temp
                    } 
                    else { // 标签的前部分，如<div>
                        last = current;
                        while (i < str.length() && current != ' '
                                && current != ' ' && current != '>') {
                            temp.append(current);
                            last = current;
                            current = str.charAt(i++);
                        }

                        // 已经读取到标签，跳过其他内容，如<div id=test>跳过id=test
                        while (i < str.length() && current != '>') {
                            last = current;
                            current = str.charAt(i++);
                            if (current == '"' || current == '\'') { // 判断引号
                                flag = flag ? false : true;
                                currentJump = current;
                                if (flag) { // 若引号不闭合，跳过到下一个引号之间的内容
                                    while (i < str.length() && str.charAt(i++) != currentJump);
                                    current = str.charAt(i++);
                                    flag = false;
                                }
                            }
                        }
                        if (last != '/' && current == '>') // 判断这种类型：<TagName />
                            unclosedTags[1].add(temp.toString());
                        temp.delete(0, temp.length());
                    }
                }
            } 
            else {
                while (i < str.length() && str.charAt(i++) != currentJump); // 跳过引号之间的部分
                flag = false;
            }
        }
        return unclosedTags;
    }
}

public class Test 
{
    public static void main(String[] args)
    {
        System.out.println("--功能测试--");
        String str1 = "tt</u>ss</a>aa<div name="<test>" id='3' other='<test>'><b>sff";
        String str2 = "tt<u>ss</u><div id=test name="<test>"><a>fds</a></div>";
        System.out.println("检查文本 " + str1);
        System.out.println("结果：" + TagsChecker.check(str1));
        System.out.println("检查文本 " + str2);
        System.out.println("结果：" + TagsChecker.check(str2));
        System.out.println("修复文本 " + str1);
        System.out.println("结果：" + TagsChecker.fix(str1));
        
        for (int i = 0; i < 10; i++) {
            str1 += str1;
        }
        
        System.out.println();
        System.out.println("--效率测试--");
        System.out.println("文本长度：" + str1.length());
        long t1 = System.currentTimeMillis();
        boolean closed = TagsChecker.check(str1);
        long t2 = System.currentTimeMillis();
        String fixedStr = TagsChecker.fix(str1);
        long t3 = System.currentTimeMillis(); 
        System.out.println("检查用时：" + (t2 - t1) + " 毫秒 结果：" + closed);
        System.out.println("修复用时：" + (t3 - t2) + " 毫秒");
    }

}

来自：http://blog.csdn.net/CrazyGou/archive/2007/06/07/1643094.aspx

分享到：

Tomcat6配置ssi | java读取远程文件

2010-04-05 16:52
浏览 1169
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

java检测html是否闭合

评论

发表评论

相关推荐

最近访客 更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论

java检测html是否闭合

评论

发表评论

相关推荐

Maven运行Selenium报错org/w3c/dom/ElementTraversal

logback日志按天滚动记录

spring boot 中用Swagger2 构建API说明文档

SpringMVC测试框架Mock[转载]

maven本地安装jar包

java发消息MSMQ

maven导出依赖jar包到指定目录

hadoop执行hadoop namenode -format错误问题

sql 每个分类取N条数据例子

effective java第一版系在

多线程执行任务

java代码构建线程池

BugFree说明

android 阅读器分页读取字数

linux更新系统时间

jstl 一些函数用法

activeMq 简单的安全设置

activeMq收不到消息

仿goolge验证码，自己没有试过

Tomcat 配置数据源

最近访客更多访客>>