截取中英文混合字符串

senilon

浏览: 16096 次
来自: 重庆（目前漂在深圳）

最近访客更多访客>>

qiuye402

babyrjw

jf_linux

stab_roc

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

JAVA测试文章

算法 J#

避免截取中英文混合字符串时出现乱码情况（中文占两个字节，英文占一个字节）

public class JustTest
{

        /**
         * 注释必须列出：主要功能、调用的前置条件和后置条件、异常说明、关键算法、可见性决策等。
         */
        public static void main (String[] args)
            {
                // String str = "n";
                //
                // boolean flag = Pattern.matches("[n|N]\\d{0,2}", str);
                // if(flag)
                // {
                // if(str.length()>1)
                // {
                // String s = str.substring(1);
                // System.out.println(Integer.parseInt(s));
                // }
                //
                // System.out.println(flag);
                // }
                JustTest j = new JustTest();
                System.out.println(new String(j.getByteSubStr("你好d我是z中文"
                        .getBytes(), 2, 6)));

}

        /**
         * 判定指定字节数组的指定位置，是否是完整的字如"你好".getByte(gb2312),1将返回false
         * 如"你好".getByte(gb2312),2将返回true
         *
         * @param sourceByte
         *            编码的gb2312字节数组
         * @param index
         *            需要判定的数组位置
         * @return 是完整字，返回true
         */
        public boolean splitBygb2312 (byte[] sourceByte, int index)
            {
                int i = 0;
                // 判定，直到位标志等于或者大与index时结束
                while (i < index && i < sourceByte.length)
                    {
                        // 如果此位字节高位是1。则说明是一个汉字，跳两位判定
                        if (sourceByte[i] < 0)
                            {
                                i = i + 2;
                            } else
                            {
                                // 否则说明是0-127之间的字母或者符号，跳一位
                                i++;
                            }
                    }
                // 如果位标志等于index，说明此位置开始是完整字，否则说明不是完整字
                return (i == index ? true : false);
            }

        /**
         * 截取源字节数组中，指定头index与尾index的子字节数组
         *
         * @param sourceByte需要截取的数组
         * @param beginIndex
         *            起始位置
         * @param endIndex
         *            结束位置
         * @return 子数组
         */
        public byte[] getByteSubStr (byte[] sourceByte, int beginIndex,
                int endIndex)
            {
                // 判定，如果头index不是完整字，则将截取位置前移一位
                if (!this.splitBygb2312(sourceByte, beginIndex))
                    {
                        beginIndex--;
                    }
                // 判定，如果尾index不是完整字，则将截取位置前移一位
                if (!this.splitBygb2312(sourceByte, endIndex))
                    {
                        endIndex--;
                    }

                // 需要截取的长度
                int length = endIndex - beginIndex;
                byte[] resultByte = null;
                if (sourceByte.length > endIndex)
                    {
                        // 如果末尾index没有越界，则直接拷贝数组
                        resultByte = new byte[length];
                        for (int i = 0; i < length; i++)
                            {
                                resultByte[i] = sourceByte[beginIndex + i];
                            }
                    } else
                    {
                        // 如果越界，则重置数组长度，拷贝数组
                        length = sourceByte.length - beginIndex;
                        resultByte = new byte[length];
                        for (int i = 0; i < length; i++)
                            {
                                resultByte[i] = sourceByte[beginIndex + i];
                            }
                    }
                return resultByte;
            }
    }