package com.test; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.util.LinkedList; import java.util.List; import org.apache.commons.lang.StringUtils; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; public class Test { public static void main(String[] args) { long a=System.currentTimeMillis(); ExecThread thread = new ExecThread(); thread.start(); try { thread.join(); } catch (InterruptedException e) { e.printStackTrace(); } System.out.println("Main ... "+Thread.currentThread().getName()); System.out.println("执行耗时 :"+(System.currentTimeMillis()-a)/1000f+" 秒 "); } public static class ExecThread extends Thread{ public ExecThread(){} /* (non-Javadoc) * @see java.lang.Thread#run() */ @Override public void run() { try { FileWriter fileWriter = new FileWriter("D:/test-out.txt"); String excelPath = "D:/test.xlsx"; new ExcelHandler(fileWriter,excelPath,"1","").start(); System.out.println("ExecThread ... "+Thread.currentThread().getName()); } catch (IOException e) { e.printStackTrace(); } } } public static class ExcelHandler extends DefaultHandler{ private final String excelPath; private final String sheetIndex; private final String nullString; /** * 从<c />标签得到的单元格格式,获取当前单元格数值需要用到 */ private enum XSSFDataType { BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER, } private XSSFReader xssfReader; private ReadOnlySharedStringsTable sst; private StylesTable stylesTable; private boolean vIsOpen = false; private StringBuffer value = new StringBuffer(); protected XSSFDataType nextDataType; // 当前遍历的Excel单元格列索引 protected int thisColumnIndex = -1; // The last column printed to the output stream protected int lastColumnIndex = -1; protected int maxColumnCount = -1; protected int formatIndex; protected String formatString; private final DataFormatter formatter = new DataFormatter(); private List<String> rowDatas = null; private String targetFile; private FileWriter writer; public ExcelHandler(String targetFile, String excelPath, String sheetIndex, String nullString){ // this.writer = writer; this.targetFile = targetFile; this.excelPath = excelPath; this.sheetIndex = sheetIndex; this.nullString = nullString; } public ExcelHandler(FileWriter writer, String excelPath, String sheetIndex, String nullString){ this.writer = writer; this.excelPath = excelPath; this.sheetIndex = sheetIndex; this.nullString = nullString; } /** * 初始化 */ public void start(){ try { OPCPackage op = OPCPackage.open(this.excelPath,PackageAccess.READ); this.xssfReader = new XSSFReader(op); this.sst = new ReadOnlySharedStringsTable(op); this.stylesTable = xssfReader.getStylesTable(); // 开始解析 parseXmlContent(new InputSource(getOneSheetStream(sheetIndex))); } catch (Exception e) { e.printStackTrace(); } } /* (non-Javadoc) * @see org.xml.sax.helpers.DefaultHandler#startDocument() */ @Override public void startDocument() throws SAXException { try { if(null == writer && StringUtils.isNotBlank(targetFile)) { writer = new FileWriter(this.targetFile); } } catch (IOException e) { e.printStackTrace(); } System.out.println("start Excel document ... "+Thread.currentThread().getName()); } /* (non-Javadoc) * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes) */ @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("inlineStr".equals(qName) || "v".equals(qName)) { vIsOpen = true; // Clear contents cache value.setLength(0); } // c => cell else if ("c".equals(qName)) { // Get the cell reference String r = attributes.getValue("r"); int firstDigit = -1; for (int c = 0; c < r.length(); ++c) { if (Character.isDigit(r.charAt(c))) { firstDigit = c; break; } } // 当前列索引 thisColumnIndex = nameToColumn(r.substring(0, firstDigit)); // Set up defaults. this.nextDataType = XSSFDataType.NUMBER; this.formatIndex = -1; this.formatString = null; String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); if ("b".equals(cellType)) nextDataType = XSSFDataType.BOOL; else if ("e".equals(cellType)) nextDataType = XSSFDataType.ERROR; else if ("inlineStr".equals(cellType)) nextDataType = XSSFDataType.INLINESTR; else if ("s".equals(cellType)) nextDataType = XSSFDataType.SSTINDEX; else if ("str".equals(cellType)) nextDataType = XSSFDataType.FORMULA; else if (cellStyleStr != null) { // It's a number, but almost certainly one // with a special style or format int styleIndex = Integer.parseInt(cellStyleStr); XSSFCellStyle style = this.stylesTable.getStyleAt(styleIndex); this.formatIndex = style.getDataFormat(); this.formatString = style.getDataFormatString(); if (this.formatString == null) this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex); } } // row => 行开始 else if ("row".equals(qName)) { // System.out.println("------------ row ---------------"); // line = resultWriter.createLine(); rowDatas = new LinkedList<String>(); } } /* (non-Javadoc) * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String) */ @Override public void endElement(String uri, String localName, String qName) throws SAXException { Object thisObj = null; if("v".equals(qName)) { // 当前单元格数据 thisObj = extractCellValue(value,nextDataType); // System.out.println("current cell value is : "+thisObj); // Output after we've seen the string contents // Emit commas for any fields that were missing on this row if (lastColumnIndex == -1) { lastColumnIndex = 0; } try { for (int i = lastColumnIndex; i < thisColumnIndex; ++i) { // rowDatas.append(this.fieldSplit); // line.addField(nullString); rowDatas.add(nullString); writer.write("|"); } // Might be the empty string. // line.addField(thisObj==null?nullString:String.valueOf(thisObj)); rowDatas.add(thisObj==null?nullString:String.valueOf(thisObj)); writer.write(thisObj==null?nullString:String.valueOf(thisObj)); }catch (Exception e) { e.printStackTrace(); } // Update column if (thisColumnIndex > -1) lastColumnIndex = thisColumnIndex; }else if("row".equals(qName)) { try { // Print out any missing commas if needed if (maxColumnCount > 0) { // Columns are 0 based if (lastColumnIndex == -1) { lastColumnIndex = 0; } for (int i = lastColumnIndex; i < (this.maxColumnCount)-1; i++) { // rowDatas.append(this.fieldSplit); // line.addField(nullString); rowDatas.add(nullString); writer.write("|"); } } // We're onto a new row // resultWriter.sendToWriter(line); ExcelReader.datas.add(rowDatas); writer.write("\r\n"); }catch (Exception e){ e.printStackTrace(); } lastColumnIndex = -1; } } /* (non-Javadoc) * @see net.bingosoft.ExcelTemplate.imports.handler.DefaultHandler#characters(char[], int, int) */ @Override public void characters(char[] ch, int start, int length) throws SAXException { if(vIsOpen){ value.append(ch,start,length); } } /* (non-Javadoc) * @see org.xml.sax.helpers.DefaultHandler#endDocument() */ @Override public void endDocument() throws SAXException { // resultWriter.flush(); ExcelReader.isReturn = true; System.out.println("End Excel Document .. "+Thread.currentThread().getName()); try { this.writer.flush(); this.writer.close(); } catch (IOException e) { e.printStackTrace(); } } /** * <p>获取指定sheet的数据流</p> * @param sheetId * @return */ private InputStream getOneSheetStream(String sheetId){ InputStream in = null; try { in = xssfReader.getSheet("rId"+sheetId); } catch (Exception e) { e.printStackTrace(); } return in; } /** * <p>以xml的格式解析Excel数据</p> * @param sheetSource * @param tables * @param table * @throws Exception */ private void parseXmlContent(InputSource sheetSource) throws Exception { XMLReader xmlReader = null; try { System.out.println("create XML reader."); xmlReader = XMLReaderFactory.createXMLReader("org.apache.xerces.parsers.SAXParser"); } catch (Exception e) { e.printStackTrace(); } xmlReader.setContentHandler(this); xmlReader.parse(sheetSource); } /** * 从列名转换为列索引 * @param name * @return */ private int nameToColumn(String name) { int column = -1; for (int i = 0; i < name.length(); ++i) { int c = name.charAt(i); column = (column + 1) * 26 + c - 'A'; } return column; } /** * 抽取单元格数据 * @param value * @param nextDataType * @return */ private Object extractCellValue(StringBuffer value,XSSFDataType nextDataType){ Object obj = ""; switch(nextDataType) { case BOOL : char first = value.charAt(0); obj = first=='0'?false:true; break; case ERROR : obj = value.toString().trim(); break; case FORMULA : obj = value.toString().trim(); break; case INLINESTR : XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); obj = rtsi.toString().trim(); break; case SSTINDEX : String sstIndex = value.toString().trim(); try { int idx = Integer.parseInt(sstIndex); XSSFRichTextString rtss = new XSSFRichTextString(sst.getEntryAt(idx)); obj = rtss.toString().trim(); }catch (NumberFormatException ex) { ex.printStackTrace(); } break; case NUMBER : String n = value.toString().trim(); if (this.formatString != null) { obj = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); }else{ obj = n; } break; default: obj = ""; break; } return obj; } } }
相关推荐
读取Excel文件使用HSSF的usermodel API非常直观。首先,你需要创建一个`InputStream`,然后实例化`HSSFWorkbook`: ```java InputStream myxls = new FileInputStream("workbook.xls"); HSSFWorkbook wb = new ...
总的来说,Apache POI 的 HSSF 组件为 Java 开发者提供了一种强大且灵活的方式来处理 Excel 文件,无论是读取还是写入。通过熟练掌握 usermodel 和 eventusermodel API,开发者可以实现各种复杂的 Excel 处理需求,...
通过以上介绍,我们可以看出,Apache POI提供了丰富的API来支持Excel文档的创建、读取和修改,使得Java开发者可以灵活地处理Excel数据,实现各种复杂的Excel操作。在实际项目中,根据需求选择合适的API和模型,可以...
例如,以下代码展示了如何使用usermodel读取Excel文件: ```java InputStream myxls = new FileInputStream("workbook.xls"); HSSFWorkbook wb = new HSSFWorkbook(myxls); HSSFSheet sheet = wb.getSheetAt(0); ...
在HSSF中,有两种主要的API用于读取Excel文件:Usermodel和EventUsermodel。Usermodel API提供了直观的编程模型,将Excel文件映射为一系列对象,如Workbook、Sheet、Row和Cell,所有数据都保留在内存中。例如,读取...
3. **POI Excel文档结构类**:这些类是构建和操作Excel文档的基础: - `HSSFWorkbook`:表示一个Excel工作簿。 - `HSSFSheet`:代表Excel的一个工作表。 - `HSSFRow`:表示工作表中的行。 - `HSSFCell`:表示行...
这两个组件都位于 `org.apache.poi.hssf` 和 `org.apache.poi.xssf` 包下,分别提供了相应的用户模型(UserModel)API 和事件用户模型(EventUserModel)API。 用户模型 API 更直观易用,适合大部分常规操作,如...
【Java解析Excel的POI框架】是Apache Jakarta项目的一个组件,专门用于处理OLE2对象,尤其是Microsoft Excel文件。它的核心接口HSSF(Horrible Spreadsheet Format)提供了读取、写入和修改Excel文件的能力,使...
使用HSSF的usermodel读取Excel文件非常直观。首先,通过FileInputStream打开文件,然后创建HSSFWorkbook实例: ```java InputStream myxls = new FileInputStream("workbook.xls"); HSSFWorkbook wb = new ...
1. **Excel处理**:Apache POI中的HSSF(Horizontally-Scattered Stream Format)组件用于处理.xls格式的老版Excel文件。你可以创建工作簿,添加工作表,插入单元格,设置样式,公式,以及读取和修改已有数据。 2. ...
使用Apache POI和对应的OOXML Schemas,开发者可以创建Java程序来读取、修改或创建新的Office文档。例如,你可以构建一个应用来自动化生成报表,或者从大量的Excel表格中提取数据。通过调用POI API,你可以访问和...
Apache POI 是一个开源项目,专门用于处理Microsoft Office格式的文件,如Excel、Word和PowerPoint。...总之,Apache POI是Java开发者处理Excel文档的强大工具,无论是读取还是生成,都能提供高效的解决方案。
读取Excel文件则需要使用`POIFSFileSystem`来打开文件,然后通过`HSSFWorkbook`获取工作簿,进一步获取工作表、行和单元格。以下是一个基本的读取示例: ```java import org.apache.poi.hssf.usermodel.*; import ...
1. 使用UserModel读取Excel文件: ```java FileInputStream fileIn = new FileInputStream("workbook.xls"); HSSFWorkbook wb = new HSSFWorkbook(fileIn); HSSFSheet sheet1 = wb.getSheetAt(0); HSSFRow row = ...
Apache POI 是一个开源的 Java 库,它是 Apache 软件基金会的子项目,专门用于处理 Microsoft Office 的文件格式,特别是微软的 Excel(97-2002 版本及更高版本)。其核心功能是通过提供一系列 Java API 来创建、...
Java操作Office文档开发包主要指的是Apache POI项目,这是一个开源的Java库,专门用于读取、写入和修改Microsoft Office格式的文件,如Word(.doc/.docx)、Excel(.xls/.xlsx)和PowerPoint(.ppt/.pptx)。...
在提供的lib包中,可能包含了Apache POI库和其他依赖,这些依赖可能包含了处理大数据Excel所需的各种组件和优化工具。确保正确引入这些库,并结合上述优化策略,可以有效地处理大数据量的Excel导入任务,避免内存...
Apache POI 是一个广泛使用的Java库,它允许程序员在Java应用程序中读取、写入和修改Microsoft Office格式的文件,尤其是Excel文档。这个库对于在没有Microsoft Office环境的情况下处理Excel数据非常有用,尤其是在...