Unicode 的转换

chenlk823

浏览: 37848 次
性别:
来自: 武汉

最近访客更多访客>>

hijk123456789

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

转载至：
http://bbs.chinaunix.net/thread-387085-1-1.html

package com.util;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.util.Date;

public class Unicode {

        public static void main(String[] args) {
                Unicode unicode = new Unicode();
                unicode.toIndex("E:\\taobao");
        }

        /**
         * 处理某个目录下的文件
         * @param path
         */
        public void toIndex(String path) {
                toIndex(new File(path));
        }

        /**
         * 处理某个File对象
         * @param file
         */
        private void toIndex(File file) {
                Date start = new Date();
                int number = indexFiles(file);
                Date end = new Date();
                System.out.println("总共耗时" + (end.getTime()-start.getTime()) + "毫秒");
                System.out.println("一共处理" + number + "个文件");
        }

        /**
         * 递归遍历文件目录来建立索引
         * @param file
         * @return
         */
        private int indexFiles(File file) {
                if (file.isDirectory()){
                        File[] files = file.listFiles();
                        int num = 0;
                        for (int i=0;i<files.length;i++) {
                                num += indexFiles(files[i]);
                        }
                        return num;
                } else {
                        if (file.getPath().endsWith(".js"))
                        {
                                System.out.println("正在处理：" + file);
                                unicode(file.getAbsolutePath());
                                return 1;
                        }
                        else
                        {
                                System.out.println("文件类型不支持" + file);
                                return 0;
                        }
                }
        }

        /**
         * 处理文件中的unicode字符
         * @param filePath
         */
        private void unicode(String filePath) {
                String resultString = findAll(filePath);
                findLog(filePath, resultString);
        }

        public void findLog(String logFile, String logFill) {
                File file = new File(logFile);

                try {
                        BufferedWriter out = new BufferedWriter(new FileWriter(file));

                        out.write(logFill);
                        out.close();

                } catch (IOException ex) {
                        throw new RuntimeException("文件读写错误");
                }
        }

        public String findAll(String filepath) {
                StringBuffer stringBuffer = new StringBuffer();

                FileReader fileReader = null;
                BufferedReader bufferedReader = null;

                try {
                        fileReader = new FileReader(filepath);
                        bufferedReader = new BufferedReader(fileReader);

                        String line = bufferedReader.readLine();
                        while (line != null) {
                                line = decodeUnicode(line).toString();
                                stringBuffer.append(line);
                                stringBuffer.append("\r\n");
                                line = bufferedReader.readLine();
                        }
                } catch (Exception e) {
                        System.err.println(e.toString());

                } finally {
                        try {

                                bufferedReader.close();
                                fileReader.close();

                        } catch (Exception e) {
                                e.printStackTrace();

                        }
                }
                return stringBuffer.toString();
        }

        /**
         * This method will decode the String to a recognized String in ui.
         * 功能:将unicod码转为需要的格式
         *
         * @author javajohn
         * @param dataStr
         * @return
         */
        public static StringBuffer decodeUnicode(final String dataStr) {
                final StringBuffer buffer = new StringBuffer();
                String tempStr = "";
                String operStr = dataStr;

                if (operStr != null && operStr.indexOf("\\u") == -1)
                        return buffer.append(operStr); //
                if (operStr != null && !operStr.equals("") && !operStr.startsWith("\\u")) { //
                        tempStr = operStr.substring(0, operStr.indexOf("\\u")); //
                        operStr = operStr.substring(operStr.indexOf("\\u"), operStr.length());// operStr字符一定是以unicode编码字符打头的字符串
                }
                buffer.append(tempStr);
                while (operStr != null && !operStr.equals("") && operStr.startsWith("\\u")) { // 循环处理,处理对象一定是以unicode编码字符打头的字符串
                        tempStr = operStr.substring(0, 6);
                        operStr = operStr.substring(6, operStr.length());
                        String charStr = "";
                        charStr = tempStr.substring(2, tempStr.length());
                        char letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。
                        buffer.append(new Character(letter).toString());
                        if (operStr.indexOf("\\u") == -1) { //
                                buffer.append(operStr);
                        } else { // 处理operStr使其打头字符为unicode字符
                                tempStr = operStr.substring(0, operStr.indexOf("\\u"));
                                operStr = operStr.substring(operStr.indexOf("\\u"), operStr.length());
                                buffer.append(tempStr);
                        }
                }
                return buffer;
        }

        public static void writeUnicode(final DataOutputStream out,
                        final String value) {
                try {
                        final String unicode = gbEncoding(value);
                        final byte[] data = unicode.getBytes();
                        final int dataLength = data.length;

                        System.out.println(" Data Length is: " + dataLength);
                        System.out.println(" Data is: " + value);
                        out.writeInt(dataLength); // 先写出字符串的长度
                        out.write(data, 0, dataLength); // 然后写出转化后的字符串
                } catch (IOException e) {

                }
        }

        public static String gbEncoding(final String gbString) {
                char[] utfBytes = gbString.toCharArray();
                String unicodeBytes = "";
                for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) {
                        String hexB = Integer.toHexString(utfBytes[byteIndex]);
                        if (hexB.length() <= 2) {
                                hexB = "00" + hexB;
                        }
                        unicodeBytes = unicodeBytes + "\\u" + hexB;
                }
                // System.out.println("unicodeBytes is: " + unicodeBytes);
                return unicodeBytes;
        }

}

分享到：