Java处理UTF-8带BOM的文本的读写

liufei.fir

浏览: 693644 次
性别:
来自: 上海

最近访客更多访客>>

性感迷人可爱又温柔的马云

dragonsky_w

清香白莲素还真

manyu042418

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

什么是BOM

BOM（byte-order mark），即字节顺序标记，它是插入到以UTF-8、UTF16或UTF-32编码Unicode文件开头的特殊标记，用来识别Unicode文件的编码类型。对于UTF-8来说，BOM并不是必须的，因为BOM用来标记多字节编码文件的编码类型和字节顺序（big-endian或little- endian）。

BOMs 文件头:
   00 00 FE FF    = UTF-32, big-endian
   FF FE 00 00    = UTF-32, little-endian
   EF BB BF       = UTF-8,
   FE FF          = UTF-16, big-endian
   FF FE          = UTF-16, little-endian

‍

下面举个例子，针对UTF-8的文件BOM做个处理：

String　xmla　=　StringFileToolkit.file2String（new　File（“D：\\projects\\mailpost\\src\\a.xml”），“UTF-8”）;

byte［］　b　=　xmla.getBytes（“UTF-8”）;

String　xml　=　new　String（b，3，b.length-3，“UTF-8”）;

..............

思路是：先按照UTF-8编码读取文件后，跳过前三个字符，重新构建一个新的字符串，然后用Dom4j解析处理，这样就不会报错了。

其他编码的方式处理思路类似，其实可以写一个通用的自动识别的BOM的工具，去掉BOM信息，返回字符串。

不过这个处理过程已经有牛人解决过了：http://koti.mbnet.fi/akini/java/unicodereader/

‍Example code using UnicodeReader class
Here is an example method to read text file. It will recognize bom marker and skip it while reading. 

//import ‍http://koti.mbnet.fi/akini/java/unicodereader/UnicodeReader.java.txt
   public static char[] loadFile(String file) throws IOException {
      // read text file, auto recognize bom marker or use 
      // system default if markers not found.
      BufferedReader reader = null;
      CharArrayWriter writer = null;
      UnicodeReader r = new UnicodeReader(new FileInputStream(file), null);
  
      char[] buffer = new char[16 * 1024];   // 16k buffer
      int read;
      try {
         reader = new BufferedReader(r);
         writer = new CharArrayWriter();
         while( (read = reader.read(buffer)) != -1) {
            writer.write(buffer, 0, read);
         }
         writer.flush();
         return writer.toCharArray();
      } catch (IOException ex) {
         throw ex;
      } finally {
         try {
            writer.close(); reader.close(); r.close();
         } catch (Exception ex) { }
      }
   }

Example code to write UTF-8 with bom marker
Write bom marker bytes to start of empty file and all proper text editors have no problems using a correct charset while reading files. Java's OutputStreamWriter does not write utf8 bom marker bytes. 


   public static void saveFile(String file, String data, boolean append) throws IOException {
      BufferedWriter bw = null;
      OutputStreamWriter osw = null;
  
      File f = new File(file);
      FileOutputStream fos = new FileOutputStream(f, append);
      try {
         // write UTF8 BOM mark if file is empty
         if (f.length() < 1) {
           final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF };
            fos.write(bom);
         }

         osw = new OutputStreamWriter(fos, "UTF-8");
         bw = new BufferedWriter(osw);
         if (data != null) bw.write(data);
      } catch (IOException ex) {
         throw ex;
      } finally {
         try { bw.close(); fos.close(); } catch (Exception ex) { }
      }
   }

实际应用：

package com.dayo.gerber;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.util.Properties;

/**
 * 
 * @author 刘飞(liufei)
 * 
 */
public class Generate4YYQTPScript {

	private static final String ENCODING = "UTF-8";
	private static final String GERBER_CONFIG = "config/gerber4yy.properties";

	private static Properties GERBER_CONFIG_PROPS = null;
	private static final String GERBER_FORMAT_DIALOG_TITLE_SCRIPT = "{#GERBER_FORMAT_DIALOG_TITLE}";
	private static String GERBER_FORMAT_DIALOG_TITLE = "";

	/* gerber properties parmters keys config */
	private static final String QTP_SCRIPT_IN = "script.in";

	private static final String QTP_SCRIPT_OUT = "script.out";

	private static final String QTP_SYSTEM_PATH = "QTP.system.path";
	private static final String QTP_SYSTEM_PATH_SCRIPT = "{#QTPSYSTEMPATH}";

	private static final String GERBER_FILE_DRIVER_PATH = "gerber.file.driver.path";
	private static final String GERBER_FILE_DRIVER_PATH_SCRIPT = "{#driver}";

	private static final String GERBER_FILE_DRIVER = "gerber.file.driver";
	private static final String GERBER_FILE_DRIVER_SCRIPT = "{#dr}";

	private static final String GERBER_FILE_DIR = "gerber.file.dir";
	private static final String GERBER_FILE_DIR_SCRIPT = "{#dirName}";

	private static final String GERBER_FILE = "gerber.file";
	private static final String GERBER_FILE_SCRIPT = "{#fileName}";

	private static final String GERBER_OUT = "gerber.out";
	private static final String GERBER_OUT_SCRIPT = "{#gerberout}";

	private static final String VB_EXE_PATH = "vb.exe.path";

	/* bigBoard props */
	private static final String LEAGUE_BOARD_NUM_SCRIPT = "{#LEAGUE_BOARD_NUM}";
	private static final String WIDTH_SCRIPT = "{#WIDTH}";
	private static final String P_SCRIPT = "{#P}" ;
	private static final String DY_SCRIPT = "{#DY}";

	private Properties BIGBOARD_PROPS = null;

	public Generate4YYQTPScript(Properties bigboard_props) {
		super();
		BIGBOARD_PROPS = bigboard_props;

		try {
			GERBER_CONFIG_PROPS = ConfigHelper
					.getConfigProperties(GERBER_CONFIG);
			GERBER_FORMAT_DIALOG_TITLE = GERBER_CONFIG_PROPS.getProperty(
					GERBER_FILE_DRIVER).trim().toUpperCase()
					+ "\\"
					+ GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim()
							.toUpperCase()
					+ "\\"
					+ GERBER_CONFIG_PROPS.getProperty(GERBER_FILE).trim()
							.toUpperCase();
			GERBER_FORMAT_DIALOG_TITLE = GERBER_FORMAT_DIALOG_TITLE.substring(0, 17) ;
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

	public static void main(String[] args) throws IOException {
		Properties bigboard_props = new Properties() ;
		bigboard_props.setProperty("{#LEAGUE_BOARD_NUM}", String.valueOf(4)) ;
		bigboard_props.setProperty("{#WIDTH}", String.valueOf(new Double("54"))) ;
		bigboard_props.setProperty("{#P}", String.valueOf(new Double("2"))) ;
		bigboard_props.setProperty("{#DY}", String.valueOf(new Double("0.00"))) ;
		
		Generate4YYQTPScript generateQTPScript = new Generate4YYQTPScript(bigboard_props);
		generateQTPScript.generateQTPScript();
//		RuntimeUtil.getInstance().run(generateQTPScript.getVBEXE(), 1, 50000);
	}

	public String getCheckOutFilePath() {
		return GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DRIVER).trim() + "/"
				+ GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim();
	}

	public String getSavePath() {
		return GERBER_CONFIG_PROPS.getProperty(GERBER_OUT);
	}

	public String getVBEXE() {
		return GERBER_CONFIG_PROPS.getProperty(VB_EXE_PATH);
	}

	/**
	 * Generate QTP Script
	 * 
	 * @return
	 * @throws IOException
	 */
	public File generateQTPScript() throws IOException {
		return generateQTPScript(GERBER_CONFIG_PROPS
				.getProperty(QTP_SCRIPT_OUT), GERBER_CONFIG_PROPS
				.getProperty(QTP_SCRIPT_IN));
	}

	/**
	 * set value to script
	 * 
	 * @param source
	 * @return
	 * @throws IOException
	 */
	private String scriptConvey(String source) throws IOException {
		String _source = source;
		_source = this.replace(this.replace(this.replace(
				this.replace(this.replace(this.replace(this.replace(
						
						_source
						,
						GERBER_FORMAT_DIALOG_TITLE_SCRIPT,
						GERBER_FORMAT_DIALOG_TITLE), GERBER_FILE_SCRIPT,
						GERBER_CONFIG_PROPS.getProperty(GERBER_FILE)),
						GERBER_FILE_DRIVER_SCRIPT, GERBER_CONFIG_PROPS
								.getProperty(GERBER_FILE_DRIVER)),
						GERBER_OUT_SCRIPT, GERBER_CONFIG_PROPS
								.getProperty(GERBER_OUT)),
				GERBER_FILE_DIR_SCRIPT, GERBER_CONFIG_PROPS
						.getProperty(GERBER_FILE_DIR)),
				GERBER_FILE_DRIVER_PATH_SCRIPT, GERBER_CONFIG_PROPS
						.getProperty(GERBER_FILE_DRIVER_PATH)),
				QTP_SYSTEM_PATH_SCRIPT, GERBER_CONFIG_PROPS
						.getProperty(QTP_SYSTEM_PATH));

		if (this.BIGBOARD_PROPS != null) {
			_source = this.replace(this.replace(this.replace(
					
					_source
					
					,
					DY_SCRIPT, this.BIGBOARD_PROPS.getProperty(DY_SCRIPT)),
					WIDTH_SCRIPT, this.BIGBOARD_PROPS
							.getProperty(WIDTH_SCRIPT)),
					LEAGUE_BOARD_NUM_SCRIPT, this.BIGBOARD_PROPS
							.getProperty(LEAGUE_BOARD_NUM_SCRIPT));
			
			_source = this.replace(_source, P_SCRIPT, this.BIGBOARD_PROPS.getProperty(P_SCRIPT)) ;
		}

		return _source;
	}

	/**
	 * Generate QTP Script
	 * 
	 * @param target
	 *            target file
	 * @param source
	 *            source file
	 * @throws IOException
	 */
	public File generateQTPScript(File target, File source) throws IOException {
		return generateQTPScript(target.getAbsolutePath(), source
				.getAbsolutePath());
	}

	/**
	 * Generate QTP Script
	 * 
	 * @param target
	 *            target file path
	 * @param source
	 *            source file path
	 * @return
	 * @throws IOException
	 */
	public File generateQTPScript(String target, String source)
			throws IOException {
		File f = new File(target);
		if (!f.exists()) {
			f.getParentFile().mkdirs();
			try {
				f.createNewFile();
			} catch (Exception e) {
			}
		}
		FileOutputStream fos = null;
		OutputStreamWriter osw = null;
		BufferedWriter bw = null;
		try {
			final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF };
			fos = new FileOutputStream(f);
			osw = new OutputStreamWriter(fos, ENCODING);
			bw = new BufferedWriter(osw);
			fos.write(bom);
			bw.write(scriptConvey(getSourceFileContentReader(source)));

			bw.flush();
			bw.close();
			return f;
		} catch (IOException e) {
			throw e;
		}
	}

	/**
	 * Reader convey to string
	 * 
	 * @param source
	 * @return
	 * @throws IOException
	 */
	private String reader2String(Reader source) throws IOException {
		BufferedReader bufferedReader = new BufferedReader(source);
		StringBuffer result = new StringBuffer();
		String buffer = null;
		while ((buffer = bufferedReader.readLine()) != null) {
			result.append(buffer + "\n");
		}
		return result.toString();
	}

	/**
	 * 
	 * @param source
	 *            file path
	 * @return
	 * @throws IOException
	 */
	private Reader getReader(String source) throws IOException {
		return source == "" ? null : new BufferedReader(new InputStreamReader(
				getInputStream(source)));
	}

	/**
	 * get script file content string
	 * 
	 * @param source
	 * @return
	 * @throws IOException
	 */
	private String getSourceFileContentReader(String source) throws IOException {
		return source == "" ? "" : reader2String(getReader(source));
	}

	/**
	 * get inputstream
	 * 
	 * @param source
	 *            file path
	 * @return
	 * @throws IOException
	 */
	private InputStream getInputStream(String source) throws IOException {
		return source == "" ? null : new FileInputStream(new File(source));
	}

	/**
	 * Replace all occurences of a substring within a string with another
	 * string.
	 * 
	 * @param inString
	 *            String to examine
	 * @param oldPattern
	 *            String to replace
	 * @param newPattern
	 *            String to insert
	 * @return a String with the replacements
	 */
	private String replace(String inString, String oldPattern, String newPattern) {
		if (!hasLength(inString) || !hasLength(oldPattern)
				|| newPattern == null) {
			return inString;
		}
		StringBuilder sb = new StringBuilder();
		int pos = 0;
		int index = inString.indexOf(oldPattern);
		int patLen = oldPattern.length();
		while (index >= 0) {
			sb.append(inString.substring(pos, index));
			sb.append(newPattern);
			pos = index + patLen;
			index = inString.indexOf(oldPattern, pos);
		}
		sb.append(inString.substring(pos));
		return sb.toString();
	}

	private boolean hasLength(String str) {
		return hasLength((CharSequence) str);
	}

	private boolean hasLength(CharSequence str) {
		return (str != null && str.length() > 0);
	}
}

分享到：

Ibatis读写CLOB数据 | JAVA注册为WINDOW服务的方法

2011-08-01 11:28
浏览 3069
评论(0)
分类:Web前端
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论