class MyEncoding { public static string EncodeUtf8ByteToString(byte[] buffer) { int count = 0; int index = 0; byte a = 0; int utfLength = buffer.Length; char[] result = new char[utfLength]; while (count < utfLength) { if ((result[index] = (char)buffer[count++]) < 0x80) { index++; } else if (((a = (byte)result[index]) & 0xE0) == 0xC0) { if (count >= utfLength) { throw new IOException("Invalid UTF-8 encoding found, start of two byte char found at end."); } byte b = buffer[count++]; if ((b & 0xC0) != 0x80) { throw new IOException( "Invalid UTF-8 encoding found, byte two does not start with 0x80."); } result[index++] = (char)(((a & 0x1F) << 6) | (b & 0x3F)); } else if ((a & 0xF0) == 0xE0) { if (count + 1 >= utfLength) { throw new IOException( "Invalid UTF-8 encoding found, start of three byte char found at end."); } byte b = buffer[count++]; byte c = buffer[count++]; if (((b & 0xC0) != 0x80) || ((c & 0xC0) != 0x80)) { throw new IOException( "Invalid UTF-8 encoding found, byte two does not start with 0x80."); } result[index++] = (char)(((a & 0x0F) << 12) | ((b & 0x3F) << 6) | (c & 0x3F)); } else { throw new IOException("Invalid UTF-8 encoding found, aborting."); } } return new String(result, 0, index); ; } /// <summary> /// Method ReadString /// </summary> /// <returns>A string</returns> public static String ReadString(byte[] bytearr) { int utflen = bytearr.Length; if (utflen > -1) { StringBuilder str = new StringBuilder(utflen); //byte[] bytearr = new byte[utflen]; //int bytesRead = 0; //while (bytesRead < utflen) //{ // int rc = Read(bytearr, bytesRead, utflen - bytesRead); // if (rc == 0) // throw new IOException("premature end of stream"); // bytesRead += rc; //} int c, char2, char3; int count = 0; while (count < utflen) { c = bytearr[count] & 0xff; switch (c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: /* 0xxxxxxx */ count++; str.Append((char)c); break; case 12: case 13: /* 110x xxxx 10xx xxxx */ count += 2; if (count > utflen) { throw new IOException("Invalid UTF-8 encoding found, aborting."); } char2 = bytearr[count - 1]; if ((char2 & 0xC0) != 0x80) { throw new IOException("Invalid UTF-8 encoding found, aborting."); } str.Append((char)(((c & 0x1F) << 6) | (char2 & 0x3F))); break; case 14: /* 1110 xxxx 10xx xxxx 10xx xxxx */ count += 3; if (count > utflen) { throw new IOException("Invalid UTF-8 encoding found, aborting."); } char2 = bytearr[count - 2]; char3 = bytearr[count - 1]; if (((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80)) { throw new IOException("Invalid UTF-8 encoding found, aborting."); } str.Append((char)(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0))); break; default: /* 10xx xxxx, 1111 xxxx */ throw new IOException("Invalid UTF-8 encoding found, aborting."); } } // The number of chars produced may be less than utflen return str.ToString(); } else { return null; } } public static byte[] EncodeStringToUtf8Byte(string text) { if (text != null) { char[] charr = text.ToCharArray(); uint utfLength = CountUtf8Bytes(charr); if (utfLength > int.MaxValue) { throw new IOException( String.Format( "Cannot marshall an encoded string longer than: {0} bytes, supplied" + "string requires: {1} characters to encode", int.MaxValue, utfLength)); } byte[] bytearr = new byte[utfLength]; EncodeUTF8toBuffer(charr, bytearr); return bytearr; } else { return new byte[0]; } } private static uint CountUtf8Bytes(char[] chars) { uint utfLength = 0; int c = 0; for (int i = 0; i < chars.Length; i++) { c = chars[i]; if ((c >= 0x0001) && (c <= 0x007F)) { utfLength++; } else if (c > 0x07FF) { utfLength += 3; } else { utfLength += 2; } } return utfLength; } private static void EncodeUTF8toBuffer(char[] chars, byte[] buffer) { int c = 0; int count = 0; for (int i = 0; i < chars.Length; i++) { c = chars[i]; if ((c >= 0x0001) && (c <= 0x007F)) { buffer[count++] = (byte)c; } else if (c > 0x07FF) { buffer[count++] = (byte)(0xE0 | ((c >> 12) & 0x0F)); buffer[count++] = (byte)(0x80 | ((c >> 6) & 0x3F)); buffer[count++] = (byte)(0x80 | ((c >> 0) & 0x3F)); } else { buffer[count++] = (byte)(0xC0 | ((c >> 6) & 0x1F)); buffer[count++] = (byte)(0x80 | ((c >> 0) & 0x3F)); } } } }
Endian helper:
public class EndianSupport { public static char SwitchEndian(char x) { return (char)( (((char)((byte)(x))) << 8) | (((char)((byte)(x >> 8)))) ); } public static short SwitchEndian(short x) { return (short)( (((ushort)((byte)(x))) << 8) | (((ushort)((byte)(x >> 8)))) ); } public static int SwitchEndian(int x) { return (((int)((byte)(x))) << 24) | (((int)((byte)(x >> 8))) << 16) | (((int)((byte)(x >> 16))) << 8) | (((int)((byte)(x >> 24)))); } public static long SwitchEndian(long x) { return (((long)((byte)(x))) << 56) | (((long)((byte)(x >> 8))) << 48) | (((long)((byte)(x >> 16))) << 40) | (((long)((byte)(x >> 24))) << 32) | (((long)((byte)(x >> 32))) << 24) | (((long)((byte)(x >> 40))) << 16) | (((long)((byte)(x >> 48))) << 8) | (((long)((byte)(x >> 56)))); } public static ushort SwitchEndian(ushort x) { return (ushort)( (((ushort)((byte)(x))) << 8) | (((ushort)((byte)(x >> 8)))) ); } public static uint SwitchEndian(uint x) { return (((uint)((byte)(x))) << 24) | (((uint)((byte)(x >> 8))) << 16) | (((uint)((byte)(x >> 16))) << 8) | (((uint)((byte)(x >> 24)))); } public static ulong SwitchEndian(ulong x) { return (((ulong)((byte)(x))) << 56) | (((ulong)((byte)(x >> 8))) << 48) | (((ulong)((byte)(x >> 16))) << 40) | (((ulong)((byte)(x >> 24))) << 32) | (((ulong)((byte)(x >> 32))) << 24) | (((ulong)((byte)(x >> 40))) << 16) | (((ulong)((byte)(x >> 48))) << 8) | (((ulong)((byte)(x >> 56)))); } public static double SwitchEndian(double x) { MemoryStream ms = new MemoryStream(); BinaryWriter bw = new BinaryWriter(ms); bw.Write(x); bw.Flush(); ms = new MemoryStream(SwitchEndian(ms.ToArray())); BinaryReader br = new BinaryReader(ms); return br.ReadDouble(); } public static float SwitchEndian(float x) { MemoryStream ms = new MemoryStream(); BinaryWriter bw = new BinaryWriter(ms); bw.Write(x); bw.Flush(); ms = new MemoryStream(SwitchEndian(ms.ToArray())); BinaryReader br = new BinaryReader(ms); return br.ReadSingle(); } public static byte[] SwitchEndian(byte[] x) { byte[] rc = new byte[x.Length]; int j = x.Length - 1; for (int i = 0; i < x.Length; i++) { rc[i] = x[j]; j--; } return rc; } }
本文将深入探讨“常用中文编码显示及转换”的主题,包括汉字的页面编码、URL编码,以及在GBK和UTF-8之间的转换方法。同时,我们还将分析提供的源码文件,以了解其实现细节。 首先,我们要明白汉字的页面编码。页面...
`ToCharArray()`方法可以将字符串转换为字符数组,而`Encoding`类如`Encoding.UTF8.GetBytes()`可以将字符串转换为字节数组,`Encoding.UTF8.GetString()`则用于反向转换。 **各种数值类型和字节数组之间的转换** ...
- **解码**:相反,将`bytes`转换回`str`,使用`decode`方法,同样需指定解码格式:`website_string = website_bytes_utf8.decode()`。若不指定编码,Python通常会使用默认的UTF-8进行解码。 4. **不同编码间的...
byte[] bytes = data.getBytes("UTF-8"); ``` 3. **接收响应**:在读取服务器返回的响应时,也要注意解码过程。使用`InputStreamReader`和`BufferedReader`时,需指定正确的字符集: ```java InputStream ...
然而,网络上流传的一种通过GBK转换UTF-8然后再转换回来的方法可能存在隐患: ```java String str = "汉字测试"; String str1 = new String(str.getBytes("UTF-8"), "GBK"); String str2 = new String(str1....
一种常见的方法是检查HTTP响应头中的"Content-Type"字段,其中可能包含了字符编码信息,如`charset=utf-8`。如果没有明确的编码信息,可以通过搜索常见的BOM(Byte Order Mark)或者查看网页中特定字符的编码来判断...
3. **String类与字符转换**:`String`类提供了许多方法来处理字符,如`getBytes()`用于将字符串转换为字节数组,`new String(byte[], charset)`则可以将字节数组转换回字符串,指定的charset确保了正确的字符编码。...
String resp = new String(respBuffer, Charset.forName("UTF-8")); return resp; } } ``` 在这个示例中,`post()`方法接收字节数组和Content-Type作为参数,创建一个`PostMethod`对象并设置请求头。`...
例如,将UTF-8编码的字符串转换为GBK编码:`new String(str.getBytes("UTF-8"), "GBK")`。 五、Swing和AWT界面组件 在GUI界面中,组件的字体设置和文本渲染可能涉及编码问题。需确保字体支持中文,且在设置文本时...
- 文件编码格式设为UTF-8,注意UTF-8文件可能包含BOM(Byte Order Mark),这在某些情况下(如使用session)会导致问题。可以使用支持去除BOM的编辑器如EditPlus进行文件保存,并选择去除BOM选项。 ##### 2. 字符...
例如,从GBK编码的字节数组转换为UTF-8编码的字节数组,而非将一个`String`从GBK“转换”为UTF-8。这种转换通常发生在数据需要以字节形式在网络或磁盘上进行传输时。 在编写JSP页面时,常常会在顶部声明`...
例如,接收到请求后,我们需要确定请求头中的Content-Type是否指定了正确的字符集,然后使用`new String(byte[], charset)`将字节数组转换为字符串。如果网页编码为UTF-8,那么在服务器端也需要确保处理请求时使用的...
- **字符编码转换**:使用`new String(byte[], charsetName)`构造方法可以将字节数组按照指定的字符集转换为字符串。 5. **I/O流与编码** - **InputStreamReader与OutputStreamWriter**:这些类用于在字节流和...
- **请求编码**:Struts2默认使用ISO-8859-1编码,如果上传文件名包含中文,需在Action中手动将文件名转换为UTF-8,例如使用`new String(file.getName().getBytes("ISO-8859-1"), "UTF-8")`。 - **文件存储**:...
String param = new String(request.getParameter("paramName").getBytes("ISO-8859-1"), "UTF-8"); ``` 综上所述,Java开发中的中文处理问题主要涉及字符编码的转换和设置,以及与数据库的兼容性。通过理解Unicode...
byte[] responseHeadBuffer = utf8.GetBytes(responseHead); ``` - 构建HTTP响应的状态行。 - 将HTML响应体转换为字节数组。 - 构建HTTP响应头,包括Content-Type和Content-Length。 6. **发送响应**: ```...
数据通常是字节数组,因此在发送字符串时需要进行编码(如UTF8)并将字符串转换为字节,反之亦然。 例如,服务器端代码可能包含如下部分: ```vbnet Dim serverSocket As New Socket(AddressFamily.InterNetwork, ...
可以使用 Java 的 String 类的 getBytes() 方法将字符串转换为 byte 数组,然后使用新的编码方式重新构建字符串。例如: ```java String str = "中文字符串"; byte[] bytes = str.getBytes("utf-8"); String newStr...
String text = new String(b, "UTF-8"); // 或者直接转换 String text2 = new String(str.getBytes("ISO-8859-1"), "UTF-8"); ``` 2. **方法二:设置请求编码** - **适用场景**:适用于HTTP请求中的POST请求...
- 使用`String.getBytes()`和`new String(byte[], charset)`时,需明确指定字符集,避免使用默认编码。 8. **AndroidManifest.xml配置**: - 在AndroidManifest.xml中,可以设置`<application>`标签的`android:...