- 浏览: 56922 次
- 性别:
- 来自: 深圳
-
文章分类
最新评论
知道java的字符集编码,那么java是怎么读取内存中的字节转换成你需要的字符的呢,其实很简单,
java中的是通过StringCoding来完成字符转换的,他是一个内嵌类,现将源代码拷至如下:
Code
public class StringCoding {
private StringCoding() {
}
/**//*
* The cached coders for each thread
*/
private static ThreadLocal decoder = new ThreadLocal();
private static ThreadLocal encoder = new ThreadLocal();
private static boolean warnUnsupportedCharset = true;
private static Object deref(ThreadLocal tl) {
SoftReference sr = (SoftReference) tl.get();
if (sr == null)
return null;
return sr.get();
}
private static void set(ThreadLocal tl, Object ob) {
tl.set(new SoftReference(ob));
}
// Trim the given byte array to the given length
//
private static byte[] trim(byte[] ba, int len) {
if (len == ba.length)
return ba;
byte[] tba = new byte[len];
System.arraycopy(ba, 0, tba, 0, len);
return tba;
}
// Trim the given char array to the given length
//
private static char[] trim(char[] ca, int len) {
if (len == ca.length)
return ca;
char[] tca = new char[len];
System.arraycopy(ca, 0, tca, 0, len);
return tca;
}
private static int scale(int len, float expansionFactor) {
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when len is larger than 2**24.
return (int) (len * (double) expansionFactor);
}
private static Charset lookupCharset(String csn) {
if (Charset.isSupported(csn)) {
try {
return Charset.forName(csn);
} catch (UnsupportedCharsetException x) {
throw new Error(x);
}
}
return null;
}
private static void warnUnsupportedCharset(String csn) {
if (warnUnsupportedCharset) {
// Use sun.misc.MessageUtils rather than the Logging API or
// System.err since this method may be called during VM
// initialization before either is available.
MessageUtils.err("WARNING: Default charset " + csn
+ " not supported, using ISO-8859-1 instead");
warnUnsupportedCharset = false;
}
}
// -- Decoding --
// Encapsulates either a ByteToCharConverter or a CharsetDecoder
//
private static abstract class StringDecoder {
private final String requestedCharsetName;
protected StringDecoder(String requestedCharsetName) {
this.requestedCharsetName = requestedCharsetName;
}
final String requestedCharsetName() {
return requestedCharsetName;
}
abstract String charsetName();
abstract char[] decode(byte[] ba, int off, int len);
}
// A string decoder based upon a ByteToCharConverter
//
private static class ConverterSD extends StringDecoder {
private ByteToCharConverter btc;
private ConverterSD(ByteToCharConverter btc, String rcn) {
super(rcn);
this.btc = btc;
}
String charsetName() {
return btc.getCharacterEncoding();
}
char[] decode(byte[] ba, int off, int len) {
int en = scale(len, btc.getMaxCharsPerByte());
char[] ca = new char[en];
if (len == 0)
return ca;
btc.reset();
int n = 0;
try {
n = btc.convert(ba, off, off + len, ca, 0, en);
n += btc.flush(ca, btc.nextCharIndex(), en);
} catch (CharConversionException x) {
// Yes, this is what we've always done
n = btc.nextCharIndex();
}
return trim(ca, n);
}
}
// A string decoder based upon a CharsetDecoder
//
private static class CharsetSD extends StringDecoder {
private final Charset cs;
private final CharsetDecoder cd;
private CharsetSD(Charset cs, String rcn) {
super(rcn);
this.cs = cs;
this.cd = cs.newDecoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
}
String charsetName() {
if (cs instanceof HistoricallyNamedCharset)
return ((HistoricallyNamedCharset) cs).historicalName();
return cs.name();
}
char[] decode(byte[] ba, int off, int len) {
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (len == 0)
return ca;
cd.reset();
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return trim(ca, cb.position());
}
}
static char[] decode(String charsetName, byte[] ba, int off, int len)
throws UnsupportedEncodingException {
StringDecoder sd = (StringDecoder) deref(decoder);
String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
if ((sd == null)
|| !(csn.equals(sd.requestedCharsetName()) || csn.equals(sd
.charsetName()))) {
sd = null;
try {
Charset cs = lookupCharset(csn);
if (cs != null)
sd = new CharsetSD(cs, csn);
else
sd = null;
} catch (IllegalCharsetNameException x) {
// FALL THROUGH to ByteToCharConverter, for compatibility
}
if (sd == null)
sd = new ConverterSD(ByteToCharConverter.getConverter(csn), csn);
set(decoder, sd);
}
return sd.decode(ba, off, len);
}
static char[] decode(byte[] ba, int off, int len) {
String csn = Converters.getDefaultEncodingName();
try {
return decode(csn, ba, off, len);
} catch (UnsupportedEncodingException x) {
Converters.resetDefaultEncodingName();
warnUnsupportedCharset(csn);
}
try {
return decode("ISO-8859-1", ba, off, len);
} catch (UnsupportedEncodingException x) {
// If this code is hit during VM initialization, MessageUtils is
// the only way we will be able to get any kind of error message.
MessageUtils.err("ISO-8859-1 charset not available: "
+ x.toString());
// If we can not find ISO-8859-1 (a required encoding) then things
// are seriously wrong with the installation.
System.exit(1);
return null;
}
}
// -- Encoding --
// Encapsulates either a CharToByteConverter or a CharsetEncoder
//
private static abstract class StringEncoder {
private final String requestedCharsetName;
protected StringEncoder(String requestedCharsetName) {
this.requestedCharsetName = requestedCharsetName;
}
final String requestedCharsetName() {
return requestedCharsetName;
}
abstract String charsetName();
abstract byte[] encode(char[] cs, int off, int len);
}
// A string encoder based upon a CharToByteConverter
//
private static class ConverterSE extends StringEncoder {
private CharToByteConverter ctb;
private ConverterSE(CharToByteConverter ctb, String rcn) {
super(rcn);
this.ctb = ctb;
}
String charsetName() {
return ctb.getCharacterEncoding();
}
byte[] encode(char[] ca, int off, int len) {
int en = scale(len, ctb.getMaxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0)
return ba;
ctb.reset();
int n;
try {
n = ctb.convertAny(ca, off, (off + len), ba, 0, en);
n += ctb.flushAny(ba, ctb.nextByteIndex(), en);
} catch (CharConversionException x) {
throw new Error("Converter malfunction: "
+ ctb.getClass().getName(), x);
}
return trim(ba, n);
}
}
// A string encoder based upon a CharsetEncoder
//
private static class CharsetSE extends StringEncoder {
private Charset cs;
private CharsetEncoder ce;
private CharsetSE(Charset cs, String rcn) {
super(rcn);
this.cs = cs;
this.ce = cs.newEncoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
}
String charsetName() {
if (cs instanceof HistoricallyNamedCharset)
return ((HistoricallyNamedCharset) cs).historicalName();
return cs.name();
}
byte[] encode(char[] ca, int off, int len) {
int en = scale(len, ce.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0)
return ba;
ce.reset();
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, off, len);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return trim(ba, bb.position());
}
java中的是通过StringCoding来完成字符转换的,他是一个内嵌类,现将源代码拷至如下:
Code
public class StringCoding {
private StringCoding() {
}
/**//*
* The cached coders for each thread
*/
private static ThreadLocal decoder = new ThreadLocal();
private static ThreadLocal encoder = new ThreadLocal();
private static boolean warnUnsupportedCharset = true;
private static Object deref(ThreadLocal tl) {
SoftReference sr = (SoftReference) tl.get();
if (sr == null)
return null;
return sr.get();
}
private static void set(ThreadLocal tl, Object ob) {
tl.set(new SoftReference(ob));
}
// Trim the given byte array to the given length
//
private static byte[] trim(byte[] ba, int len) {
if (len == ba.length)
return ba;
byte[] tba = new byte[len];
System.arraycopy(ba, 0, tba, 0, len);
return tba;
}
// Trim the given char array to the given length
//
private static char[] trim(char[] ca, int len) {
if (len == ca.length)
return ca;
char[] tca = new char[len];
System.arraycopy(ca, 0, tca, 0, len);
return tca;
}
private static int scale(int len, float expansionFactor) {
// We need to perform double, not float, arithmetic; otherwise
// we lose low order bits when len is larger than 2**24.
return (int) (len * (double) expansionFactor);
}
private static Charset lookupCharset(String csn) {
if (Charset.isSupported(csn)) {
try {
return Charset.forName(csn);
} catch (UnsupportedCharsetException x) {
throw new Error(x);
}
}
return null;
}
private static void warnUnsupportedCharset(String csn) {
if (warnUnsupportedCharset) {
// Use sun.misc.MessageUtils rather than the Logging API or
// System.err since this method may be called during VM
// initialization before either is available.
MessageUtils.err("WARNING: Default charset " + csn
+ " not supported, using ISO-8859-1 instead");
warnUnsupportedCharset = false;
}
}
// -- Decoding --
// Encapsulates either a ByteToCharConverter or a CharsetDecoder
//
private static abstract class StringDecoder {
private final String requestedCharsetName;
protected StringDecoder(String requestedCharsetName) {
this.requestedCharsetName = requestedCharsetName;
}
final String requestedCharsetName() {
return requestedCharsetName;
}
abstract String charsetName();
abstract char[] decode(byte[] ba, int off, int len);
}
// A string decoder based upon a ByteToCharConverter
//
private static class ConverterSD extends StringDecoder {
private ByteToCharConverter btc;
private ConverterSD(ByteToCharConverter btc, String rcn) {
super(rcn);
this.btc = btc;
}
String charsetName() {
return btc.getCharacterEncoding();
}
char[] decode(byte[] ba, int off, int len) {
int en = scale(len, btc.getMaxCharsPerByte());
char[] ca = new char[en];
if (len == 0)
return ca;
btc.reset();
int n = 0;
try {
n = btc.convert(ba, off, off + len, ca, 0, en);
n += btc.flush(ca, btc.nextCharIndex(), en);
} catch (CharConversionException x) {
// Yes, this is what we've always done
n = btc.nextCharIndex();
}
return trim(ca, n);
}
}
// A string decoder based upon a CharsetDecoder
//
private static class CharsetSD extends StringDecoder {
private final Charset cs;
private final CharsetDecoder cd;
private CharsetSD(Charset cs, String rcn) {
super(rcn);
this.cs = cs;
this.cd = cs.newDecoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
}
String charsetName() {
if (cs instanceof HistoricallyNamedCharset)
return ((HistoricallyNamedCharset) cs).historicalName();
return cs.name();
}
char[] decode(byte[] ba, int off, int len) {
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (len == 0)
return ca;
cd.reset();
ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
CharBuffer cb = CharBuffer.wrap(ca);
try {
CoderResult cr = cd.decode(bb, cb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = cd.flush(cb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return trim(ca, cb.position());
}
}
static char[] decode(String charsetName, byte[] ba, int off, int len)
throws UnsupportedEncodingException {
StringDecoder sd = (StringDecoder) deref(decoder);
String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
if ((sd == null)
|| !(csn.equals(sd.requestedCharsetName()) || csn.equals(sd
.charsetName()))) {
sd = null;
try {
Charset cs = lookupCharset(csn);
if (cs != null)
sd = new CharsetSD(cs, csn);
else
sd = null;
} catch (IllegalCharsetNameException x) {
// FALL THROUGH to ByteToCharConverter, for compatibility
}
if (sd == null)
sd = new ConverterSD(ByteToCharConverter.getConverter(csn), csn);
set(decoder, sd);
}
return sd.decode(ba, off, len);
}
static char[] decode(byte[] ba, int off, int len) {
String csn = Converters.getDefaultEncodingName();
try {
return decode(csn, ba, off, len);
} catch (UnsupportedEncodingException x) {
Converters.resetDefaultEncodingName();
warnUnsupportedCharset(csn);
}
try {
return decode("ISO-8859-1", ba, off, len);
} catch (UnsupportedEncodingException x) {
// If this code is hit during VM initialization, MessageUtils is
// the only way we will be able to get any kind of error message.
MessageUtils.err("ISO-8859-1 charset not available: "
+ x.toString());
// If we can not find ISO-8859-1 (a required encoding) then things
// are seriously wrong with the installation.
System.exit(1);
return null;
}
}
// -- Encoding --
// Encapsulates either a CharToByteConverter or a CharsetEncoder
//
private static abstract class StringEncoder {
private final String requestedCharsetName;
protected StringEncoder(String requestedCharsetName) {
this.requestedCharsetName = requestedCharsetName;
}
final String requestedCharsetName() {
return requestedCharsetName;
}
abstract String charsetName();
abstract byte[] encode(char[] cs, int off, int len);
}
// A string encoder based upon a CharToByteConverter
//
private static class ConverterSE extends StringEncoder {
private CharToByteConverter ctb;
private ConverterSE(CharToByteConverter ctb, String rcn) {
super(rcn);
this.ctb = ctb;
}
String charsetName() {
return ctb.getCharacterEncoding();
}
byte[] encode(char[] ca, int off, int len) {
int en = scale(len, ctb.getMaxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0)
return ba;
ctb.reset();
int n;
try {
n = ctb.convertAny(ca, off, (off + len), ba, 0, en);
n += ctb.flushAny(ba, ctb.nextByteIndex(), en);
} catch (CharConversionException x) {
throw new Error("Converter malfunction: "
+ ctb.getClass().getName(), x);
}
return trim(ba, n);
}
}
// A string encoder based upon a CharsetEncoder
//
private static class CharsetSE extends StringEncoder {
private Charset cs;
private CharsetEncoder ce;
private CharsetSE(Charset cs, String rcn) {
super(rcn);
this.cs = cs;
this.ce = cs.newEncoder().onMalformedInput(
CodingErrorAction.REPLACE).onUnmappableCharacter(
CodingErrorAction.REPLACE);
}
String charsetName() {
if (cs instanceof HistoricallyNamedCharset)
return ((HistoricallyNamedCharset) cs).historicalName();
return cs.name();
}
byte[] encode(char[] ca, int off, int len) {
int en = scale(len, ce.maxBytesPerChar());
byte[] ba = new byte[en];
if (len == 0)
return ba;
ce.reset();
ByteBuffer bb = ByteBuffer.wrap(ba);
CharBuffer cb = CharBuffer.wrap(ca, off, len);
try {
CoderResult cr = ce.encode(cb, bb, true);
if (!cr.isUnderflow())
cr.throwException();
cr = ce.flush(bb);
if (!cr.isUnderflow())
cr.throwException();
} catch (CharacterCodingException x) {
// Substitution is always enabled,
// so this shouldn't happen
throw new Error(x);
}
return trim(ba, bb.position());
}
发表评论
-
高精度
2011-08-31 20:40 1000之前提到过在商业运算中要使用BigDecimal来进行相关的钱 ... -
java中如何进行高精度运算
2011-08-31 20:32 795import java.math.BigDecimal; im ... -
win7下安装vs2008
2011-01-09 23:28 848在win7下安装VS2008有点不太让人愉快,不过白痴还是找到 ... -
设置响应头字段的些许应用
2010-12-11 13:04 1058禁止浏览器缓存当前文档内容 只要增加如下的响应头字段: r ... -
正则表达式
2010-12-11 12:39 581整理资料如下: \\ 反斜杠 \t 间隔 ('\u0009' ... -
应该懂的基础问题
2010-12-11 12:14 695问题一:我声明了什么! String s = &quo ... -
Java中的堆栈
2010-12-11 10:59 651Java把内存划分成两种:一种是栈内存,一种是堆内存。 ... -
Java IO 小结
2010-12-11 10:51 602什么是数据流 ----------数据流是指所有的数据通信通道 ... -
字符转换
2010-11-30 20:43 638问:怎样将GBK 编码的汉字转换成为 UTF-8 编码的汉 ... -
进制转换
2010-11-30 20:42 795计算机操作里面时常要牵涉到二进制、十进制、十六进制等。特别是在 ... -
多叉树解决复杂表头问题
2010-11-30 20:21 785现代化WEB项目中少不了要用到报表展现.对于中国式报表来说,复 ... -
关于java编码的转换
2010-11-30 20:17 576问:怎样将GBK 编码的汉字转换成为 UTF-8 编码的汉 ... -
编译器报:未调用原型函数(是有意用变量定义的吗?)(本文为原创,转载清注明出外)
2010-11-30 20:08 1817MSDN解释:编译器检测到未使用的函数原型。如果有意将该原型作 ... -
C++类型转换运算符重载
2010-11-30 20:06 850在C++中有一个特殊的运算符重载方法--类型转换运算符重载,形 ... -
ATL分解字符串
2010-11-30 20:05 753一个简单的分解字符串的方法: Code vector< ... -
java字符集(三)之java.io
2010-11-30 20:04 926java IO 的输出分两种,字节流: InputStream ... -
编码字符集与字符集编码的区别(转)
2010-11-30 20:03 652需要再一次强调的是,无论历史上的UCS还是现如今的Unicod ... -
Unicode与UCS的历史恩怨(转载)
2010-11-30 19:59 550ASCII及相关标准 ... -
Java字符集(一)
2010-11-30 19:57 659UNICODE,GBK和BIG5就是编码的值,而utf-8,u ...
相关推荐
### Java字符集编码乱码详解 #### 一、编码与乱码基础知识 在计算机科学领域,字符集(Character Set)是指一系列符号和电子通信代码的标准集合。每种字符集都有其特定的应用场景和优势。例如,ASCII(American ...
### Java字符集和编码 #### 一、引言 在探讨Java字符集和编码之前,我们先了解一下为什么在Java编程中需要关注字符集和编码。Java作为一种广泛应用的编程语言,其内部采用的是Unicode编码,这使得Java能够很好地...
### Java支持的字符集 Java作为一种广泛使用的编程语言,在处理多语言环境下的文本时,其对字符集的支持显得尤为重要。本文将详细介绍Java所支持的基本字符集(Basic Encoding Set)和扩展字符集(Extended ...
### Java字符集详解 #### 一、概述与背景 本文主要探讨了字符编码的基本概念以及Java编程语言如何处理不同字符集。随着信息技术的发展,字符编码技术也在不断演进,以支持全球范围内各种语言的文本表示需求。文章...
本文将围绕“Java字符集编码简记”这一主题,深入探讨相关知识点,并结合标签“源码”和“工具”,探讨在实际开发中如何运用和处理字符编码问题。 首先,我们需要理解字符集的概念。字符集是一系列符号的集合,例如...
### Java字符集编码问题详解 #### 一、引言 在Java编程中,字符集编码问题是一个常见且重要的议题。由于不同的系统、平台以及网络环境中可能存在多种字符编码格式,这导致了在处理文本数据时可能会遇到编码不一致...
JAVA及相关字符集编码问题 在深入探讨JAVA与字符集编码问题之前,我们首先需要理解不同字符集编码的基本概念以及它们在JAVA环境中的应用。字符集编码是计算机系统中表示文字的一种方式,它决定了如何将字符转换为二...
在Java编程语言中,字符集(Charset)是用于表示文本数据的一系列规则,它定义了字符与二进制数据之间的映射关系。...以上就是关于Java字符集解码方法的详细说明,希望对您理解Java字符集处理有所帮助。
java 字符集编码转换,时间格式化,数字判断等,java文件
在Java开发中,连接Oracle数据库是一项常见的任务,尤其是在处理特定字符集如American ASCII7时,开发者需要对字符编码有深入的理解。Oracle数据库支持多种字符集,包括ASCII,它是最基础的7位字符集,包含32个控制...
Java中的字符集是一个重要的概念,尤其对于处理多语言文本或者跨平台的数据交换至关重要。Java语言内部使用Unicode编码,具体来说是UTF-16格式,这意味着每个`char`类型变量能够表示一个Unicode字符,通常占据两个...
### Java中的Unicode与字符集详解 #### 一、引言 在软件开发过程中,正确处理文本数据至关重要。尤其是在全球化日益加深的今天,软件不仅要能够处理英语等常见的西方语言,还要支持世界各地的语言,包括中文、日文...
Java字符集是一个涵盖编码基础知识、Java编程环境与字符编码关系以及不同编码标准如何在Java中应用的主题。在本文中,我们将深入探讨这些方面,以便更好地理解Java如何处理各种字符编码。 首先,我们要明白编码的...
在Java编程中,正确地处理文件的字符集编码至关重要,特别是在读取或写入含有非ASCII字符(如中文、日文、韩文等)的文件时。`cpdetector`是Java中一个常用的库,用于自动检测文件的字符集编码。这个库能够帮助...
二、字符集 字符集是表示字符的规则集合,Java使用Unicode字符集。常见的字符集有ASCII、GBK、UTF-8等。在Java中,字符流处理涉及字符集转换: 1. InputStreamReader和OutputStreamWriter 这两个类在字节流和字符流...
Java字符集基础知识与问题 字符集编码是计算机处理文本数据的基础,不同的编码方式适用于不同的应用场景。本文主要讨论编码的基本知识,特别是与Java相关的部分,包括ISO8859-1、GB2312/GBK、Unicode以及UTF编码。...
Java字符集处理涉及编码转换、文件读写、网络通信和数据库操作等多个方面。理解和正确使用字符集是避免乱码问题的关键。在处理多语言内容时,推荐使用Unicode(如UTF-8)编码,因为它具有广泛兼容性和可扩展性。在...
Java字符集是Java编程语言中处理字符编码的基础概念,它对于理解如何在程序中正确地存储、处理和传输文本至关重要。在Java中,字符集主要指的是Unicode字符集,特别是其子集UTF-8,它是Java默认使用的字符编码。Java...
Java字符集是编程中至关重要的概念,涉及到计算机中字符的表示和中文字符的编码问题。在Java中,字符集是用来定义字符的二进制代码集合,它允许程序处理各种语言的文字。 首先,我们来理解计算机中字符的表示。...