`
DavyJones2010
  • 浏览: 154869 次
  • 性别: Icon_minigender_1
  • 来自: 杭州
社区版块
存档分类
最新评论

Java POI: Convert Excel to CSV by XSSFReader

阅读更多

1. 

  public String convertExcelToCsv(File uploadedFile, String reportType)
            throws InvalidFormatException, IOException, InterruptedException
    {
        logger.info(String
                .format("Start convertExcelToCsv, uploadedFile.name: [%s], reportTYpe: [%s]",
                        uploadedFile.getName(), reportType));

        String csvFileName = String.format("%s%s%s_%s.csv", tempCsvDir,
                File.separator, reportType, Calendar.getInstance()
                        .getTimeInMillis());

        BlockingQueue<List<String>> rowDataQueue = new ArrayBlockingQueue<List<String>>(
                10000);
        ExecutorService executorService = Executors.newFixedThreadPool(2);
        executorService.submit(new CustomizedXSSFReader(uploadedFile,
                rowDataQueue));
        executorService.submit(new CustomizedXSSFWriter(csvFileName,
                rowDataQueue));
        executorService.shutdown();

        while (!executorService.isTerminated())
        {
            logger.info("Main thread waits 1 more second for convertExcelToCsv");
            executorService.awaitTermination(1, TimeUnit.SECONDS);
        }

        logger.info(String.format(
                "Finished convertExcelToCsv, csvFileName: [%s]", csvFileName));
        return csvFileName;
    }

 

2. 

public class CustomizedXSSFReader implements Callable<Object>
{
    private static final Logger logger = Logger
            .getLogger(CustomizedXSSFReader.class);

    private File excelFile;
    private List<SheetDefination> targetSheetDefList;
    private BlockingQueue<List<String>> rowDataQueue;

    public CustomizedXSSFReader(File excelFile,
            BlockingQueue<List<String>> rowDataQueue)
    {
        super();
        this.excelFile = excelFile;
        this.rowDataQueue = rowDataQueue;
    }

    @Override
    public Object call() throws Exception
    {
        OPCPackage opcPackage = OPCPackage.open(excelFile);
        XSSFReader xssfReader = new XSSFReader(opcPackage);

        readWorkbookDef(xssfReader);
        readSheet(xssfReader);

        return null;
    }

    private void readWorkbookDef(XSSFReader xssfReader) throws IOException,
            InvalidFormatException, SAXException
    {
        InputStream workbookDefInputStream = xssfReader.getWorkbookData();
        XMLReader workbookDefXMLReader = XMLReaderFactory.createXMLReader();
        workbookDefXMLReader
                .setContentHandler(new CustomizedWorkbookDefHandler());
        workbookDefXMLReader.parse(new InputSource(workbookDefInputStream));
    }

    public void readSheet(XSSFReader xssfReader) throws Exception
    {
        logger.info(String.format(
                "Start readSheet, excelFileName: [%s], targetSheetRID: [%s]",
                excelFile.getName(), targetSheetDefList.get(0).getSheetRID()));

        SharedStringsTable sharedStringsTable = xssfReader
                .getSharedStringsTable();

        InputStream inputStream = xssfReader.getSheet(targetSheetDefList.get(0)
                .getSheetRID());
        XMLReader xmlReader = XMLReaderFactory.createXMLReader();
        xmlReader.setContentHandler(new CustomizedContentHandler(
                sharedStringsTable));

        InputSource inputSource = new InputSource(inputStream);
        xmlReader.parse(inputSource);
        inputStream.close();

        logger.info(String.format("Finished readSheet, excelFileName: [%s]",
                excelFile.getName()));
    }

    private static class SheetDefination
    {
        private String sheetName;
        private String sheetId;
        private String sheetRID;

        public SheetDefination(String sheetName, String sheetId, String sheetRID)
        {
            super();
            this.sheetName = sheetName;
            this.sheetId = sheetId;
            this.sheetRID = sheetRID;
        }

        @SuppressWarnings("unused")
        public String getSheetName()
        {
            return sheetName;
        }

        @SuppressWarnings("unused")
        public String getSheetId()
        {
            return sheetId;
        }

        public String getSheetRID()
        {
            return sheetRID;
        }

        @Override
        public String toString()
        {
            return "SheetDefination [sheetName=" + sheetName + ", sheetId="
                    + sheetId + ", sheetRID=" + sheetRID + "]";
        }

    }

    private class CustomizedWorkbookDefHandler extends DefaultHandler
    {
        private boolean isWorkbookStarted;
        private boolean isSheetsStarted;
        private boolean isSheetStarted;

        @Override
        public void startElement(String uri, String localName, String name,
                Attributes attributes) throws SAXException
        {
            if ("workbook".equals(name))
            {
                isWorkbookStarted = true;
            }
            else if ("sheets".equals(name))
            {
                isSheetsStarted = true;
                targetSheetDefList = new ArrayList<SheetDefination>();
            }
            else if ("sheet".equals(name))
            {
                isSheetStarted = true;
            }

            if (isWorkbookStarted && isSheetsStarted && isSheetStarted)
            {
                String sheetName = attributes.getValue("name");
                String sheetId = attributes.getValue("sheetId");
                String sheetRID = attributes.getValue("r:id");
                SheetDefination sheetDefination = new SheetDefination(
                        sheetName, sheetId, sheetRID);
                logger.info(String.format(
                        "Added sheetDefination: [%s] into targetSheetDefList.",
                        sheetDefination));
                targetSheetDefList.add(sheetDefination);
            }
        }

        @Override
        public void endElement(String uri, String localName, String name)
                throws SAXException
        {
            if ("workbook".equals(name))
            {
                logger.info(String
                        .format("Finished resolve workbookDef, targetSheetDefList: [%s]",
                                targetSheetDefList));
                isWorkbookStarted = false;
            }
            else if ("sheets".equals(name))
            {
                logger.info(String.format(
                        "Finished resolve sheetsDef, targetSheetDefList: [%s]",
                        targetSheetDefList));
                isSheetsStarted = false;
            }
            else if ("sheet".equals(name))
            {
                isSheetStarted = false;
            }
        }

    }

    private class CustomizedContentHandler extends DefaultHandler
    {
        private boolean isSheetDataStarted;
        private boolean isRowStarted;
        private boolean isColumnStarted;
        private boolean isValueStarted;
        private boolean valueShouldGetFromSharedStringTable;

        private SharedStringsTable sharedStringsTable;
        private StringBuilder cellValue;

        private int totalRowNum;
        private int totalColumnNum;

        private int currColumnNumCursor = 0;
        private int prevColumnNumCursor = -1;

        private int currRowNumCursor = 0;
        private int prevRowNumCursor = -1;

        private List<String> rowData;

        public CustomizedContentHandler(SharedStringsTable sharedStringsTable)
        {
            super();
            this.sharedStringsTable = sharedStringsTable;
        }

        private void initSheetInfo(String cellReferenceRegion)
        {
            logger.info(String.format(
                    "Start initSheetInfo, cellReferenceRegion: [%s]",
                    cellReferenceRegion));

            String[] cellReferences = StringUtils.split(cellReferenceRegion,
                    ':');
            if (2 != cellReferences.length)
            {
                totalColumnNum = 10;
                totalRowNum = 10;

                logger.warn(String
                        .format("cellReferenceRegion: [%s] is not reliable, thus we use default totalColumnNum: [%d], totalRowNum: [%d]",
                                cellReferenceRegion, totalColumnNum,
                                totalRowNum));
            }
            else
            {
                String startCellReference = cellReferences[0];
                String endCellReference = cellReferences[1];

                int startX = CellReferenceUtil
                        .getColIndexByCoordName(startCellReference);
                int startY = CellReferenceUtil
                        .getRowIndexByCoordName(startCellReference);

                int endX = CellReferenceUtil
                        .getColIndexByCoordName(endCellReference);
                int endY = CellReferenceUtil
                        .getRowIndexByCoordName(endCellReference);
                totalRowNum = endY - startY + 1;
                totalColumnNum = endX - startX + 1;

                logger.info(String
                        .format("Finished initSheetInfo, totalRowNum: [%d], totalColumnNum: [%d]",
                                totalRowNum, totalColumnNum));
            }
        }

        @Override
        public void startElement(String uri, String localName, String name,
                Attributes attributes) throws SAXException
        {
            if ("dimension".equals(name))
            {
                String cellReferenceRegion = attributes.getValue("ref");
                initSheetInfo(cellReferenceRegion);
            }
            else if ("sheetData".equals(name))
            {
                isSheetDataStarted = true;
            }
            else if ("row".equals(name))
            {
                isRowStarted = true;
                currRowNumCursor = Integer.parseInt(attributes.getValue("r")) - 1;
                rowData = new ArrayList<String>(totalColumnNum);
            }
            else if ("c".equals(name))
            {
                isColumnStarted = true;

                String cellIndex = attributes.getValue("r");
                currColumnNumCursor = CellReferenceUtil
                        .getColIndexByCoordName(cellIndex);

                String cellType = attributes.getValue("t");
                if ("s".equals(cellType))
                {
                    valueShouldGetFromSharedStringTable = true;
                }
                else
                {
                    valueShouldGetFromSharedStringTable = false;
                }
            }
            else if ("v".equals(name))
            {
                isValueStarted = true;
                cellValue = new StringBuilder();
            }
        }

        private void fillEmptyCellValuesIfNecessary()
        {
            if ((currColumnNumCursor - prevColumnNumCursor) > 1)
            {
                for (int i = 1; i < currColumnNumCursor - prevColumnNumCursor; i++)
                {
                    rowData.add("");
                }
            }
        }

        private void fillEmptyRowValuesIfNecessary()
        {
            if ((currRowNumCursor - prevRowNumCursor) > 1)
            {
                for (int i = 1; i < currRowNumCursor - prevRowNumCursor; i++)
                {
                    rowDataQueue.add(new ArrayList<String>(totalColumnNum));
                }
            }
        }

        @Override
        public void endElement(String uri, String localName, String name)
                throws SAXException
        {
            if ("sheetData".equals(name))
            {
                isSheetDataStarted = false;
                rowDataQueue.offer(new ArrayList<String>());
            }
            else if ("row".equals(name))
            {
                isRowStarted = false;

                fillEmptyRowValuesIfNecessary();
                rowDataQueue.offer(rowData);
                prevRowNumCursor = currRowNumCursor;
            }
            else if ("c".equals(name))
            {
                isColumnStarted = false;
            }
            else if ("v".equals(name))
            {
                if (isSheetDataStarted && isRowStarted && isColumnStarted
                        && isValueStarted)
                {
                    fillEmptyCellValuesIfNecessary();

                    if (valueShouldGetFromSharedStringTable)
                    {
                        valueShouldGetFromSharedStringTable = false;

                        int index = Integer.parseInt(cellValue.toString());
                        String cellValueStr = new XSSFRichTextString(
                                sharedStringsTable.getEntryAt(index))
                                .toString();
                        rowData.add(cellValueStr);
                    }
                    else
                    {
                        rowData.add(cellValue.toString());
                    }
                }
                else
                {
                    logger.error(String
                            .format("Error Excel->XML, isSheetDataStarted, all of isRowStarted, isColumnStarted, isValueStarted should  be true "));
                }
                prevColumnNumCursor = currColumnNumCursor;
                isValueStarted = false;
                cellValue = new StringBuilder();
            }
        }

        @Override
        public void characters(char[] ch, int start, int length)
                throws SAXException
        {
            if (isSheetDataStarted && isRowStarted && isColumnStarted
                    && isValueStarted)
            {
                cellValue.append(new String(ch, start, length));
            }
        }
    }

}

 

3. 

public class CustomizedXSSFWriter implements Callable<Object>
{
    private static final Logger logger = Logger
            .getLogger(CustomizedXSSFWriter.class);

    private String csvFileName;
    private BlockingQueue<List<String>> rowDataQueue;

    private int currentLineCount = 0;

    public CustomizedXSSFWriter(String csvFileName,
            BlockingQueue<List<String>> rowDataQueue)
    {
        super();
        this.csvFileName = csvFileName;
        this.rowDataQueue = rowDataQueue;
    }

    @Override
    public Object call() throws Exception
    {
        CSVWriter writer = new CSVWriter(new FileWriter(csvFileName));

        while (true)
        {
            List<String> rowData = rowDataQueue.take();

            if (rowData.isEmpty())
            {
                break;
            }
            writer.writeNext(rowData.toArray(new String[rowData.size()]));
            currentLineCount++;
        }

        writer.close();

        logger.info(String.format(
                "Finished write fileName: [%s]. totalLineCount: [%d]",
                csvFileName, currentLineCount));
        return null;
    }

}

 

4. 

    @Test
    public void convertExcelToCsvTest() throws Exception
    {
        File uploadedFile = new File(
                "src/test/resources/ccar-icg-14q/CCAR_ICG_14Q_MANUAL_ADJUSTMENT_SECURITIZED_PRODUCT_3791_RAPTOR-C-20140812_190656657_20140331.xlsx");
        String reportType = "RCAST_CCAR_ICG_14Q_ADJUSTMENT";
        ccarIcg14QFileUploadService.convertExcelToCsv(uploadedFile, reportType);
    }

 

 

 

Reference Links:

1) https://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xssf/eventusermodel/examples/FromHowTo.java

 

分享到:
评论

相关推荐

    java实现excel转csv

    在压缩包文件"excelToCsv"中,通常会包含一个示例Java程序,该程序实现了上述步骤,你可以参考它来理解和实践这个过程。通过这个程序,你可以将任意的Excel文件转换为CSV格式,方便后续的数据处理和分析。

    java_poi实现excel导入导出

    Java POI 实现 Excel 导入导出 Java POI 是一个流行的 Java 库,用于处理 Microsoft Office 文件格式,包括 Excel 文件。在本文中,我们将详细介绍如何使用 Java POI 实现 Excel 导入导出功能。 1. 什么是 Java ...

    java操作csv文件(读、写)

    在做项目的时候,发现使用POI无法解析以csv文件结尾的文件,虽然csv文件能用Excel打开,但是csv文件没有像Excel一样有规定的电子表格形式,故使用POI无法解析csv文件,在网上找了一下,发现java有提供java csv文件来...

    Java poi导入导出EXCEL工具类(兼容各版本)

    概述:Java poi导入导出EXCEL工具类(兼容各版本) 一、功能说明 允许同时导入或导出多个sheet,同一sheet可同时存在多个数据块,按数据块划分处理数据。 二、配置文件示例及详细说明 1、导入xml配置示例如下(见...

    java的poi生成excel图表demo

    Java的Apache POI库是一个强大的工具,用于读取、创建和修改Microsoft Office格式的文件,尤其是Excel(.xlsx)文档。在本示例中,我们将深入探讨如何利用POI库来生成Excel中的图表曲线,这对于数据可视化和报告生成...

    Java POI 导入导出Excel简单实例源代码

    Java POI 导入导出Excel简单实例源代码 该源代码的jar包,参见以下博文附录截图 Java POI导出EXCEL经典实现 Java导出Excel弹出下载框 http://blog.csdn.net/evangel_z/article/details/7332535 web页面导出Excel...

    java poi导出图片到excel示例代码

    Java POI导出图片到Excel示例代码详解 Java POI是Java开发中常用的开源库,用于读写Microsoft Office文件格式,包括Excel、Word、PowerPoint等。今天,我们将介绍如何使用Java POI将图片导出到Excel中。 标题解释 ...

    Java POI 生成Excel时显示货币格式

    在Java开发中,Apache POI库是处理Microsoft Office文件(如Word、Excel)的强大工具。本文将深入探讨如何使用Java POI生成带有货币格式的Excel文件,这在财务报告和其他需要精确货币显示的应用场景中尤为关键。 ##...

    java poi导出excel

    Java POI库是Apache软件基金会开发的一个开源项目,专门用于读写Microsoft Office格式的文件,包括Excel。在Java中,如果你需要导出Excel文件,Java POI是一个非常实用的工具。下面将详细介绍如何使用Java POI来实现...

    Java实现Excel读写的poi 5.2.1版本jar

    Java中的Apache POI库是一个强大的工具,用于读取和写入Microsoft Office格式的文件,特别是Excel的XLS和XLSX格式。在这个5.2.1版本中,POI提供了对Excel电子表格的强大支持,包括读取数据、修改内容、创建新工作簿...

    最新POI Lib Java Excel转换成Txt

    Apache POI是一个开源的Java读写Excel、WORD等微软OLE2组件文档的项目。目前POI已经有了Ruby版本。 结构: •HSSF - 提供读写Microsoft Excel XLS格式档案的功能。 •XSSF - 提供读写Microsoft Excel OOXML XLSX...

    JAVA POI Excel转Html

    JAVA POI Excel转Html,代码和所需的jar都在压缩包,项目在线预览需求,实现后分享下

    java使用POI导出 Excel工具类

    java使用POI导出 Excel+图片工具类 ,里面含有poi jar包,只调用接口即可直接保存Excel。使用的时候需先把数据封装,具体包装需根据实际导出数据进行处理。文件demo中只提供包装格式。

    JAVA POI 4.0 对Excel读写整理LIB以及样例工程

    JAVA POI 4.0 是一个强大的库,用于在Java应用程序中处理Microsoft Office格式的文件,特别是Excel。这个库允许开发者以编程方式创建、修改和读取Excel文档,极大地拓展了Java在电子表格处理上的能力。POI 4.0是该库...

    Java POI根据模板生成Excel(xlsx)文件

    Java POI库是一个广泛使用的开源项目,专门用于处理Microsoft Office格式的文件,包括Excel的.xls和.xlsx文件。在本示例中,我们关注的是如何利用Java POI来根据模板生成.xlsx格式的Excel文件。这个过程涉及到对...

    java poi 操作Excel

    Java POI 是一个开源项目,专门用于处理Microsoft Office格式的文件,包括Excel、Word和PowerPoint等。在Java世界中,如果你想对Excel进行读写操作,POI库是不可或缺的工具。下面将详细介绍如何使用Java POI来操作...

    JAVA poi 做EXCEL导出(包含图片) 完整版

    在Java编程环境中,Apache POI库是一个非常实用的工具,用于读取、写入和修改Microsoft Office格式的文件,特别是Excel(.xlsx 和 .xls)文件。本教程将详细介绍如何使用JAVA POI库来创建一个包含图片的完整Excel...

    Java Poi 操作excel的API 好用

    Java POI 是一个开源项目,专门用于处理Microsoft Office格式的文件,尤其是Excel。它提供了丰富的API,使得在Java环境中操作Excel文件变得极其便捷。本文将深入探讨Java POI的使用,包括其核心功能、API使用示例...

    Java通过POI读取Excel遍历数据,批量生成word文档

    Apache POI是一个开源项目,它允许Java开发者处理Microsoft Office格式的文件,如Excel(.xlsx或.xls)和Word(.docx)。 首先,我们需要导入Apache POI的相关依赖。在Maven项目中,可以在pom.xml文件中添加以下...

    java POI完整示例,POI将word转HTML,数据库倒出数据到Excel等

    Java POI是一个强大的库,主要用于处理Microsoft Office格式的文件,如Word、Excel和PowerPoint。在本示例中,我们将探讨如何使用POI进行一系列操作,包括将Word文档转换为HTML,以及从数据库导出数据到Excel。 1. ...

Global site tag (gtag.js) - Google Analytics