如何显示出Unicode文件中的汉字

0 0

如何显示出Unicode文件中的汉字5

我想把一个包含汉字Unicode编码的文件uni.txt用Java读取出来，并正确显示中文，请问该如何做呢。我使用FileInputStream和DataInputStream都没有读取成功。请大家指点一下。

uni.txt包含这样的内容：
table.rows.length; //\u8868\u683c\u603b\u884c\u6570
tableModel_arr[tib].pg.perPageCount; //\u6bcf\u9875\u8bb0\u5f55\u6570
tableModel_arr[tib].pg.page;//\u5f53\u524d\u7b2c\u51e0\u9875
。。。。。。

我单独用System.out.println("table.rows.length; //\u8868\u683c\u603b\u884c\u6570")能打印汉字;但一旦用输入流就不行了。
问题补充：
**************************************************************
感谢大家，面对大家的热情，如果我不说两句的话，我心里会憋得难受的。

以前我在其它网站提问题，不是很久没有回复，就是答非所问；在JavaEye我也是第一次提问，本是抱着试试的心里，没想到各位真是够哥们，有建议，也有代码，我都不晓得如何感激了。

祝各位前途似锦，祝JavaEye越办越火！

2008年6月24日 08:54

love2java
0
0 0 1

5个答案按时间排序按投票排序

0 0

采纳的答案

代码大概如下：

import java.io.*;

public class ReadTxtFile {
    public static void main(String[] s) throws IOException {
        new ReadTxtFile().readTxtFile("C://uni.txt");
    }

    private void readTxtFile(String fileName) throws IOException {
        File file = new File(fileName);
        FileInputStream fin = new FileInputStream(file);
        InputStreamReader read = new InputStreamReader(fin, "utf-8");
        BufferedReader reader = new BufferedReader(read);
        String content = reader.readLine();
        while (content != null) {
            char[] c = content.toCharArray();
            char[] out = new char[c.length];
            System.out.println(loadConvert(c, 0, c.length, out));
            content = reader.readLine();
        }
        read.close();
        reader.close();
        fin.close();
    }

    private String loadConvert(char[] in, int off, int len, char[] convtBuf) {
        if (convtBuf.length < len) {
            int newLen = len * 2;
            if (newLen < 0) {
                newLen = Integer.MAX_VALUE;
            }
            convtBuf = new char[newLen];
        }
        char aChar;
        char[] out = convtBuf;
        int outLen = 0;
        int end = off + len;

        while (off < end) {
            aChar = in[off++];
            if (aChar == '\\') {
                aChar = in[off++];
                if (aChar == 'u') {
                    int value = 0;
                    for (int i = 0; i < 4; i++) {
                        aChar = in[off++];
                        switch (aChar) {
                            case '0':
                            case '1':
                            case '2':
                            case '3':
                            case '4':
                            case '5':
                            case '6':
                            case '7':
                            case '8':
                            case '9':
                                value = (value << 4) + aChar - '0';
                                break;
                            case 'a':
                            case 'b':
                            case 'c':
                            case 'd':
                            case 'e':
                            case 'f':
                                value = (value << 4) + 10 + aChar - 'a';
                                break;
                            case 'A':
                            case 'B':
                            case 'C':
                            case 'D':
                            case 'E':
                            case 'F':
                                value = (value << 4) + 10 + aChar - 'A';
                                break;
                            default:
                                throw new IllegalArgumentException(
                                        "Malformed \\uxxxx encoding.");
                        }
                    }
                    out[outLen++] = (char) value;
                } else {
                    if (aChar == 't')
                        aChar = '\t';
                    else if (aChar == 'r')
                        aChar = '\r';
                    else if (aChar == 'n')
                        aChar = '\n';
                    else if (aChar == 'f')
                        aChar = '\f';
                    out[outLen++] = aChar;
                }
            } else {
                out[outLen++] = (char) aChar;
            }
        }
        return new String(out, 0, outLen);
    }
}

2008年6月24日 12:17

温柔一刀
336
0 0 0

添加评论

0 0

楼上的手快...

2008年6月24日 10:12

lggege
1787
0 0 0

添加评论

0 0

去看 Properties.loadConvert (char[] in, int off, int len, char[] convtBuf)方法. 实验后,可用.

String s = new String("\u4F60");
char[] c = s.toCharArray();			
char[] out = new char[c.length];		
System.out.println(new Tests().loadConvert(c, 0, c.length, out)); // 你

t.loadConvert就是直接拷贝的Properties.loadConvert 方法.

2008年6月24日 10:11

lggege
1787
0 0 0

添加评论

0 0

loadConvert作用是转换编码

2008年6月24日 10:07

jasongreen
315
0 0 0

添加评论

0 0

参考java.util.Properties

	public synchronized void load(InputStream inStream,String encoding) throws IOException {
		char[] convtBuf = new char[1024];
		LineReader lr = new LineReader(inStream,encoding);

		int limit;
		int keyLen;
		int valueStart;
		char c;
		boolean hasSep;
		boolean precedingBackslash;

		while ((limit = lr.readLine()) >= 0) {

			c = 0;
			keyLen = 0;
			valueStart = limit;
			hasSep = false;

			// System.out.println("line=<" + new String(lineBuf, 0, limit) +
			// ">");
			precedingBackslash = false;
			while (keyLen < limit) {
				c = lr.lineBuf[keyLen];
				// need check if escaped.
				if ((c == '=' || c == ':') && !precedingBackslash) {
					valueStart = keyLen + 1;
					hasSep = true;
					break;
				} else if ((c == ' ' || c == '\t' || c == '\f')
						&& !precedingBackslash) {
					valueStart = keyLen + 1;
					break;
				}
				if (c == '\\') {
					precedingBackslash = !precedingBackslash;
				} else {
					precedingBackslash = false;
				}
				keyLen++;
			}
			while (valueStart < limit) {
				c = lr.lineBuf[valueStart];
				if (c != ' ' && c != '\t' && c != '\f') {
					if (!hasSep && (c == '=' || c == ':')) {
						hasSep = true;
					} else {
						break;
					}
				}
				valueStart++;
			}
			String key = loadConvert(lr.lineBuf, 0, keyLen, convtBuf);
			String value = loadConvert(lr.lineBuf, valueStart, limit
					- valueStart, convtBuf);
			put(key, value);
		}
		lr.reader.close();
	}

	/*
	 * read in a "logical line" from input stream, skip all comment and blank
	 * lines and filter out those leading whitespace characters (\u0020, \u0009
	 * and \u000c) from the beginning of a "natural line". Method returns the
	 * char length of the "logical line" and stores the line in "lineBuf".
	 */
	class LineReader {
		public LineReader(InputStream inStream,String encoding) {
			try {
				this.reader = new BufferedReader(new InputStreamReader(inStream,encoding));
			} catch (UnsupportedEncodingException e) {
//				Logger.getLogger(LineReader.class).error(e);
			}
		}

		char[] inBuf = new char[8192];
		char[] lineBuf = new char[1024];
		int inLimit = 0;
		int inOff = 0;
//		InputStream inStream;
		BufferedReader reader ;

		int readLine() throws IOException {
			int len = 0;
			char c = 0;

			boolean skipWhiteSpace = true;
			boolean isCommentLine = false;
			boolean isNewLine = true;
			boolean appendedLineBegin = false;
			boolean precedingBackslash = false;
			boolean skipLF = false;
			while (true) {
				if (inOff >= inLimit) {
					inLimit = reader.read(inBuf);
					inOff = 0;
					if (inLimit <= 0) {
						if (len == 0 || isCommentLine) {
							return -1;
						}
						return len;
					}
				}
				
				// The line below is equivalent to calling a
				// ISO8859-1 decoder.
//				c = (char) (0xff & inBuf[inOff++]);
				c = inBuf[inOff++];
				if (skipLF) {
					skipLF = false;
					if (c == '\n') {
						continue;
					}
				}
				if (skipWhiteSpace) {
					if (c == ' ' || c == '\t' || c == '\f') {
						continue;
					}
					if (!appendedLineBegin && (c == '\r' || c == '\n')) {
						continue;
					}
					skipWhiteSpace = false;
					appendedLineBegin = false;
				}
				if (isNewLine) {
					isNewLine = false;
					if (c == '#' || c == '!') {
						isCommentLine = true;
						continue;
					}
				}

				if (c != '\n' && c != '\r') {
					lineBuf[len++] = c;
					if (len == lineBuf.length) {
						int newLength = lineBuf.length * 2;
						if (newLength < 0) {
							newLength = Integer.MAX_VALUE;
						}
						char[] buf = new char[newLength];
						System.arraycopy(lineBuf, 0, buf, 0, lineBuf.length);
						lineBuf = buf;
					}
					// flip the preceding backslash flag
					if (c == '\\') {
						precedingBackslash = !precedingBackslash;
					} else {
						precedingBackslash = false;
					}
				} else {
					// reached EOL
					if (isCommentLine || len == 0) {
						isCommentLine = false;
						isNewLine = true;
						skipWhiteSpace = true;
						len = 0;
						continue;
					}
					if (inOff >= inLimit) {
						inLimit = reader.read(inBuf);
						inOff = 0;
						if (inLimit <= 0) {
							return len;
						}
					}
					if (precedingBackslash) {
						len -= 1;
						// skip the leading whitespace characters in following
						// line
						skipWhiteSpace = true;
						appendedLineBegin = true;
						precedingBackslash = false;
						if (c == '\r') {
							skipLF = true;
						}
					} else {
						return len;
					}
				}
			}
		}
//=================end of readLine()===========
	}

	/*
	 * Converts encoded &#92;uxxxx to unicode chars and changes special saved
	 * chars to their original forms
	 */
	private String loadConvert(char[] in, int off, int len, char[] convtBuf) {
		if (convtBuf.length < len) {
			int newLen = len * 2;
			if (newLen < 0) {
				newLen = Integer.MAX_VALUE;
			}
			convtBuf = new char[newLen];
		}
		char aChar;
		char[] out = convtBuf;
		int outLen = 0;
		int end = off + len;

		while (off < end) {
			aChar = in[off++];
			if (aChar == '\\') {
				aChar = in[off++];
				if (aChar == 'u') {
					// Read the xxxx
					int value = 0;
					for (int i = 0; i < 4; i++) {
						aChar = in[off++];
						switch (aChar) {
						case '0':
						case '1':
						case '2':
						case '3':
						case '4':
						case '5':
						case '6':
						case '7':
						case '8':
						case '9':
							value = (value << 4) + aChar - '0';
							break;
						case 'a':
						case 'b':
						case 'c':
						case 'd':
						case 'e':
						case 'f':
							value = (value << 4) + 10 + aChar - 'a';
							break;
						case 'A':
						case 'B':
						case 'C':
						case 'D':
						case 'E':
						case 'F':
							value = (value << 4) + 10 + aChar - 'A';
							break;
						default:
							throw new IllegalArgumentException(
									"Malformed \\uxxxx encoding.");
						}
					}
					out[outLen++] = (char) value;
				} else {
					if (aChar == 't')
						aChar = '\t';
					else if (aChar == 'r')
						aChar = '\r';
					else if (aChar == 'n')
						aChar = '\n';
					else if (aChar == 'f')
						aChar = '\f';
					out[outLen++] = aChar;
				}
			} else {
				out[outLen++] = (char) aChar;
			}
		}
		return new String(out, 0, outLen);
	}

2008年6月24日 10:05

jasongreen
315
0 0 0