`

汉字转码的java实现

    博客分类:
  • java
 
阅读更多
package common;

import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;

public class HanZiCode {

private final static String[] hex = { "00", "01", "02", "03", "04", "05",
"06", "07", "08", "09", "0A", "0B", "0C", "0D", "0E", "0F", "10",
"11", "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B",
"1C", "1D", "1E", "1F", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F", "30", "31",
"32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B", "3C",
"3D", "3E", "3F", "40", "41", "42", "43", "44", "45", "46", "47",
"48", "49", "4A", "4B", "4C", "4D", "4E", "4F", "50", "51", "52",
"53", "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D",
"5E", "5F", "60", "61", "62", "63", "64", "65", "66", "67", "68",
"69", "6A", "6B", "6C", "6D", "6E", "6F", "70", "71", "72", "73",
"74", "75", "76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E",
"7F", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89",
"8A", "8B", "8C", "8D", "8E", "8F", "90", "91", "92", "93", "94",
"95", "96", "97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
"A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA",
"AB", "AC", "AD", "AE", "AF", "B0", "B1", "B2", "B3", "B4", "B5",
"B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF", "C0",
"C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB",
"CC", "CD", "CE", "CF", "D0", "D1", "D2", "D3", "D4", "D5", "D6",
"D7", "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF", "E0", "E1",
"E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC",
"ED", "EE", "EF", "F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7",
"F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF" };
private final static byte[] val = { 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x00, 0x01,
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F,
0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F, 0x3F };

public static String escape(String s) {
StringBuffer sbuf = new StringBuffer();
int len = s.length();
for (int i = 0; i < len; i++) {
int ch = s.charAt(i);
if (ch == ' ') { // space : map to '+'
sbuf.append('+');
} else if ('A' <= ch && ch <= 'Z') { // 'A'..'Z' : as it was
sbuf.append((char) ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char) ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char) ch);
} else if (ch == '-'
|| ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!' || ch == '~' || ch == '*'
|| ch == '\'' || ch == '(' || ch == '@' || ch == ')') {
sbuf.append((char) ch);
} else if (ch <= 0x007F) { // other ASCII : map to %XX
sbuf.append('%');
sbuf.append(hex[ch]);
} else { // unicode : map to %uXXXX
sbuf.append('%');
sbuf.append('u');
sbuf.append(hex[(ch >>>]);
sbuf.append(hex[(0x00FF & ch)]);
}
}
return sbuf.toString();
}

public static String unescape(String s) {
StringBuffer sbuf = new StringBuffer();
int i = 0;
int len = s.length();
while (i < len) {
int ch = s.charAt(i);
if (ch == '+') { // + : map to ' '
sbuf.append(' ');
} else if ('A' <= ch && ch <= 'Z') { // 'A'..'Z' : as it was
sbuf.append((char) ch);
} else if ('a' <= ch && ch <= 'z') { // 'a'..'z' : as it was
sbuf.append((char) ch);
} else if ('0' <= ch && ch <= '9') { // '0'..'9' : as it was
sbuf.append((char) ch);
} else if (ch == '-'
|| ch == '_' // unreserved : as it was
|| ch == '.' || ch == '!' || ch == '~' || ch == '*'
|| ch == '\'' || ch == '(' || ch == '@' || ch == ')') {
sbuf.append((char) ch);
} else if (ch == '%') {
int cint = 0;
if ('u' != s.charAt(i + 1)) { // %XX : map to ascii(XX)
cint = (cint << 4) | val[s.charAt(i + 1)];
cint = (cint << 4) | val[s.charAt(i + 2)];
i += 2;
} else { // %uXXXX : map to unicode(XXXX)
cint = (cint << 4) | val[s.charAt(i + 2)];
cint = (cint << 4) | val[s.charAt(i + 3)];
cint = (cint << 4) | val[s.charAt(i + 4)];
cint = (cint << 4) | val[s.charAt(i + 5)];
i += 5;
}
sbuf.append((char) cint);
}
i++;
}
return sbuf.toString();
}

}
分享到:
评论

相关推荐

    汉字转码实例

    汉字转码是计算机处理中文字符的关键技术之一,它涉及到字符编码、解码和转换等多个方面。在计算机系统中,汉字通常不能直接以图形形式存储,而是需要转化为特定的编码表示,以便于计算机处理和传输。这里我们将深入...

    汉字转码功能

    在Java或者Python等编程语言中,通常会有一个类库或函数来实现汉字转码。例如,在Java中,我们可以使用`java.nio.charset`包下的`CharsetDecoder`和`CharsetEncoder`类进行编码和解码。在Python中,可以使用内置的`...

    java读写excel包括utf8转码为可识别汉字gbk

    java读写excel包括utf8转码为可识别汉字gbk,jxl方式读取excel,生成一个新的excel

    java的api中文转码示例

    本文将深入探讨如何使用Java API进行中文转码,以方便URL传参和其他应用。 首先,中文字符在计算机中存储时通常采用Unicode编码,而URL参数通常要求ASCII编码,这就涉及到字符编码的转换。在Java中,我们可以使用`...

    java转码工具

    Java转码工具是一种基于Java语言开发的实用程序...总的来说,Java转码工具是处理编码问题的有效工具,尤其对于处理中文字符集的转换,它可以帮助开发者和普通用户在不同编码格式间轻松转换,确保数据的准确性和一致性。

    java转码代码

    在Java编程语言中,"转码"通常是指在不同字符编码之间进行转换的过程,以解决乱码问题。乱码是由于文件或数据流在处理时使用了错误的字符集导致的。在这个场景下,"java转码代码"指的是用于解决Java程序中遇到的乱码...

    java URL中文参数乱码处理

    js 中乱码处理法方式 encodeURIComponent(encodeURIComponent(customerAddress...js到java encodeURI(url) String qijuType= new String(request.getParameter( ("qijuType")).getBytes("ISO-8859-1"), "utf-8");

    java实现阿拉伯数字转汉字数字

    "Java实现阿拉伯数字转汉字数字" Java是一种流行的编程语言,广泛应用于Android开发、Web开发、企业软件开发等领域。在开发过程中,经常需要将阿拉伯数字转换为汉字数字,以便于更好地与中文环境集成。在本文中,...

    MD5编码后转码,转码汉字一致.txt

    针对c#或者java,其他编程语言,对中文汉字加密出现了密文不一致问题进行修复。增加编码类型,自定义编码类型

    前后台字符集转码.txt

    根据提供的文件信息,本文将详细解析前后台字符集转码的相关知识点,包括字符集的基本概念、前后端如何处理字符集转码以及示例代码中的具体实现。 ### 字符集的基本概念 在计算机科学中,字符集(Character Set)...

    将字符串中的中文做UNICODE转码,非中文忽略

    将字符串中的中文做UNICODE转码,非中文忽略 简单实用 封装好了 直接用即可

    jsp中页面间传汉字参数转码的方法.docx

    在JavaServer Pages (JSP) 开发中,页面间的参数传递是常见的操作,尤其是在处理包含汉字的参数时,由于编码问题可能会导致乱码。本文主要介绍如何在JSP中正确地进行汉字参数的转码与解码,确保数据在页面间传递时...

    转码工具

    压缩包中的"convert.jar"很可能是一个Java编写的转码工具的可执行文件。Java的JAR(Java Archive)文件是包含了Java类和资源的归档文件,可以直接运行在支持Java的环境中。用户可以通过命令行或图形界面来调用这个...

    java实现文件下载

    在Java开发中实现文件下载功能是一项常见需求,尤其是在Web应用中。文件下载功能涉及到客户端与服务器端之间的交互,服务器需要将文件内容发送给客户端浏览器,然后由浏览器负责保存或打开该文件。Java Web开发中...

    java中文乱码之解决URL中文乱码问题的方法

    在Java开发中,遇到中文乱码问题是一种常见的挑战,特别是在处理URL时。URL中文乱码问题主要是由于URL编码和解码过程中的不一致导致的。下面将详细介绍如何解决这个问题,并探讨几种常用的方法。 首先,我们需要...

    关于JAVA字符编码:Unicode,ISO-8859-1,GBK,UTF-8编码及相互转换

    在Java中,GBK编码通常用于处理简体中文和繁体中文的数据。 #### 4. UTF-8 UTF-8是一种变长字符编码,它是Unicode的实现方式之一。UTF-8编码可以很好地支持世界上大多数语言的文字,而且它的编码方式使得英文字符的...

    中文转码工具.rar

    本篇文章将详细讲解中文转码的相关知识点,以及如何将UTF-8编码转换为ISO-8859-1编码。 首先,让我们了解两种编码体系:UTF-8和ISO-8859-1。 1. **UTF-8编码**:全称为Unicode Transformation Format - 8位,是一...

    地址栏传中文

    地址栏传输数据时,中文会变成乱码。我写了个小例子,用来解决这个问题。

    ascii编码转码工具

    然而,随着全球化的推进,ASCII编码无法满足对非英文字符(如中文、日文、阿拉伯文等)的需求,因此出现了各种扩展的字符编码,如Unicode。 Struts框架,是一个基于MVC设计模式的Java Web应用框架,它极大地简化了...

    简单的UTF-8与GBK之间相互转码工具

    GBK是基于中文的编码,包含了大量的汉字和其他语言字符;而UTF-8是一种可变长度的Unicode编码,能容纳世界上几乎所有的字符集。 GBK编码是中国大陆广泛使用的编码标准,它基于GB2312,增加了许多额外的字符。UTF-8...

Global site tag (gtag.js) - Google Analytics