`

Iterate over rows and cells(EXCEL文件的读取)

阅读更多
package cn.com.songjy.test.excel;

import java.io.IOException;
import java.text.NumberFormat;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;

public class ReadLine {

	private Log log = LogFactory.getLog(ReadLine.class);

	/**
	 * 
	 * @param excel_file_path
	 *            --excel文件的存储路径
	 * @throws IOException
	 * @throws InvalidFormatException
	 */
	public void read(String excel_file_path) throws IOException, InvalidFormatException {

		Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path));

		Sheet sheet = wb.getSheetAt(0);

		/* 定义最小读取的行数 */
		int rowStart = Math.min(0, sheet.getFirstRowNum());

		/* 定义最大读取的行数 */
		int rowEnd = Math.max(0, sheet.getLastRowNum());

		for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) {

			Row row = sheet.getRow(rowNum);

			/* 假如当前为空行,跳过当前行继续读取下一行数据 */
			if (row == null)
				continue;

			/* 定义当前行的最大读取列 */
			int lastColumn = Math.max(row.getLastCellNum(), 0);

			NumberFormat df = NumberFormat.getNumberInstance();
			// df.setMaximumFractionDigits(2);//保留小数点后两位
			// df.setMinimumFractionDigits(2);//如果不这么做,那么当value的值是100.00的时候返回100
			df.setGroupingUsed(false);// 去掉千分位,如果想输出的格式用逗号隔开,可以设置成true

			for (int cn = 0; cn < lastColumn; cn++) {
				Cell cell = row.getCell(cn, Row.RETURN_BLANK_AS_NULL);

				/* 假如当前为空格,跳过当前格继续读取下一格的数据 */
				if (null == cell)
					continue;

				CellReference cellRef = new CellReference(row.getRowNum(), cell.getColumnIndex());

				log.info(cellRef.formatAsString());
				log.info(" - ");

				switch (cell.getCellType()) {
				case Cell.CELL_TYPE_STRING:
					log.info(cell.getRichStringCellValue().getString());
					break;
				case Cell.CELL_TYPE_NUMERIC:
					if (DateUtil.isCellDateFormatted(cell)) {
						log.info(cell.getDateCellValue());
					} else {
						log.info(df.format(cell.getNumericCellValue()));
					}
					break;
				case Cell.CELL_TYPE_BOOLEAN:
					log.info(cell.getBooleanCellValue());
					break;
				case Cell.CELL_TYPE_FORMULA:
					log.info(cell.getCellFormula());
					break;
				default:
					log.info("----");
				}

			}
		}
	}

	public static void main(String[] args) {
		try {
			new ReadLine().read("E:\\a.xlsx");
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InvalidFormatException e) {
			e.printStackTrace();
		}
	}
}


该程序依赖jar包

poi-3.9.jar
poi-ooxml-3.9.jar
poi-ooxml-schemas-3.9.jar
commons-logging-1.1.3.jar
dom4j-1.6.jar
xmlbeans-2.3.0.jar

Java 正则表达式解析csv文件
POI读取大数据量的Excel文件
Java读取大数据量07Excel的方法(POI),如下:
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ExcelUtil extends DefaultHandler {

	private SharedStringsTable sst;
	private String lastContents;
	private boolean nextIsString;

	private int sheetIndex = -1;
	private List<String> rowlist = new ArrayList<String>();
	private int curRow = 0;
	private int curCol = 0;

	/**
	 * 读取第一个工作簿的入口方法
	 * 
	 * @param path
	 */
	public void readOneSheet(String path) throws Exception {
		OPCPackage pkg = OPCPackage.open(path);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		InputStream sheet = r.getSheet("rId1");

		InputSource sheetSource = new InputSource(sheet);
		parser.parse(sheetSource);

		sheet.close();
	}

	/**
	 * 读取所有工作簿的入口方法
	 * 
	 * @param path
	 * @throws Exception
	 */
	public void process(String path) throws Exception {
		OPCPackage pkg = OPCPackage.open(path);
		XSSFReader r = new XSSFReader(pkg);
		SharedStringsTable sst = r.getSharedStringsTable();

		XMLReader parser = fetchSheetParser(sst);

		Iterator<InputStream> sheets = r.getSheetsData();
		while (sheets.hasNext()) {
			curRow = 0;
			sheetIndex++;
			InputStream sheet = sheets.next();
			InputSource sheetSource = new InputSource(sheet);
			parser.parse(sheetSource);
			sheet.close();
		}
	}

	/**
	 * 该方法自动被调用,每读一行调用一次,在方法中写自己的业务逻辑即可
	 * 
	 * @param sheetIndex
	 *            工作簿序号
	 * @param curRow
	 *            处理到第几行
	 * @param rowList
	 *            当前数据行的数据集合
	 */
	public void optRow(int sheetIndex, int curRow, List<String> rowList) {
		String temp = "";
		for (String str : rowList) {
			temp += str + "_";
		}
		System.out.println(temp);
	}

	public XMLReader fetchSheetParser(SharedStringsTable sst)
			throws SAXException {
		XMLReader parser = XMLReaderFactory
				.createXMLReader("org.apache.xerces.parsers.SAXParser");
		this.sst = sst;
		parser.setContentHandler(this);
		return parser;
	}

	public void startElement(String uri, String localName, String name,
			Attributes attributes) throws SAXException {
		// c => 单元格
		if (name.equals("c")) {
			// 如果下一个元素是 SST 的索引,则将nextIsString标记为true
			String cellType = attributes.getValue("t");
			if (cellType != null && cellType.equals("s")) {
				nextIsString = true;
			} else {
				nextIsString = false;
			}
		}
		// 置空
		lastContents = "";
	}

	public void endElement(String uri, String localName, String name)
			throws SAXException {
		// 根据SST的索引值的到单元格的真正要存储的字符串
		// 这时characters()方法可能会被调用多次
		if (nextIsString) {
			try {
				int idx = Integer.parseInt(lastContents);
				lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
						.toString();
			} catch (Exception e) {

			}
		}

		// v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
		// 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
		if (name.equals("v")) {
			String value = lastContents.trim();
			value = value.equals("") ? " " : value;
			rowlist.add(curCol, value);
			curCol++;
		} else {
			// 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
			if (name.equals("row")) {
				optRow(sheetIndex, curRow, rowlist);
				rowlist.clear();
				curRow++;
				curCol = 0;
			}
		}
	}

	public void characters(char[] ch, int start, int length)
			throws SAXException {
		// 得到单元格内容的值
		lastContents += new String(ch, start, length);
	}

}

j​a​v​a​向​e​x​c​e​l​ ​写​入​海​量​数​据​内​存​溢​出​问​题​的​解​决,如下:
import java.io.FileOutputStream;
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

public class Test {

	/*** @param args */
	public static void main(String[] args) {
		if (args[0].equals("hssf")) {
			hssfTest();
		}
		if (args[0].equals("sxssf")) {
			sxssfTest();
		}
	}

	/* 不会内存溢出,最后生成一个大概40M的文件 */
	public static void sxssfTest() {
		Workbook wb = new SXSSFWorkbook(100); // keep 100 rows in memory,//
												// exceeding rows will be//
												// flushed to disk
		Sheet sh = wb.createSheet();
		int rownum = 0;
		try {
			while (true) {
				Row row = sh.createRow(rownum);
				for (int cellnum = 0; cellnum < 10; cellnum++) {
					Cell cell = row.createCell(cellnum);
					String address = new CellReference(cell).formatAsString();
					cell.setCellValue(address);
				}
				System.out.println(rownum);
				rownum++;
				if (rownum >= 1000000)
					break;
			}
			FileOutputStream out = new FileOutputStream("sxssf.xlsx");
			wb.write(out);
			out.close();
		} catch (Exception e) {
			System.out.println(ExceptionUtils.getFullStackTrace(e));
		}
	}

	/* 20000行左右就内存溢出了 */
	public static void hssfTest() {
		XSSFWorkbook wb = new XSSFWorkbook();
		Sheet sh = wb.createSheet();
		int rownum = 0;
		try {
			while (true) {
				Row row = sh.createRow(rownum);
				for (int cellnum = 0; cellnum < 10; cellnum++) {
					Cell cell = row.createCell(cellnum);
					String address = new CellReference(cell).formatAsString();
					cell.setCellValue(address);
				}
				System.out.println(rownum);
				rownum++;
				if (rownum >= 1000000)
					break;
			}
			FileOutputStream out = new FileOutputStream("hssf.xlsx");
			wb.write(out);
			out.close();
		} catch (Exception e) {
			System.out.println(ExceptionUtils.getFullStackTrace(e));
		}
	}
}


apache common之CSV文件操作

读取合并单元格的内容
package demo;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellRangeAddress;

public class ReadLine {

	private Log log = LogFactory.getLog(ReadLine.class);

	/**
	 * 
	 * @param excel_file_path
	 *            --excel文件的存储路径
	 * @throws IOException
	 * @throws InvalidFormatException
	 */
	public void read(String excel_file_path) throws IOException, InvalidFormatException {

		Workbook wb = WorkbookFactory.create(new java.io.File(excel_file_path));

		Sheet sheet = wb.getSheetAt(0);

		/* 定义最小读取的行数 */
		int rowStart = Math.min(0, sheet.getFirstRowNum());

		/* 定义最大读取的行数 */
		int rowEnd = Math.max(0, sheet.getLastRowNum());

		for (int rowNum = rowStart; rowNum <= rowEnd; rowNum++) {

			Row row = sheet.getRow(rowNum);

			/* 假如当前为空行,跳过当前行继续读取下一行数据 */
			if (row == null)
				continue;

			/* 定义当前行的最大读取列 */
			int lastColumn = Math.max(row.getLastCellNum(), 0);

			for (int cellnum = 0; cellnum < lastColumn; cellnum++) {
				Cell cell = row.getCell(cellnum, Row.RETURN_NULL_AND_BLANK);

				boolean is = isMergedRegion(sheet, rowNum, cellnum);// 判断是合并单元格否
				if (true == is) {
					log.info(getMergedRegionValue(sheet, rowNum, cellnum));
				} else {
					log.info(getCellValue(cell));
				}

			}
		}
	}

	/**
	 * 判断指定的单元格是否是合并单元格
	 * 
	 * @param sheet
	 * @param row
	 * @param column
	 * @return
	 */
	public boolean isMergedRegion(Sheet sheet, int row, int column) {
		int sheetMergeCount = sheet.getNumMergedRegions();

		for (int i = 0; i < sheetMergeCount; i++) {
			CellRangeAddress ca = sheet.getMergedRegion(i);
			int firstColumn = ca.getFirstColumn();
			int lastColumn = ca.getLastColumn();
			int firstRow = ca.getFirstRow();
			int lastRow = ca.getLastRow();

			if (row >= firstRow && row <= lastRow) {
				if (column >= firstColumn && column <= lastColumn) {
					return true;
				}
			}
		}

		return false;
	}

	/**
	 * 获取单元格的值
	 * 
	 * @param cell
	 * @return
	 */
	public String getCellValue(Cell cell) {

		if (cell == null)
			return "";

		if (cell.getCellType() == Cell.CELL_TYPE_STRING) {

			return cell.getStringCellValue();

		} else if (cell.getCellType() == Cell.CELL_TYPE_BOOLEAN) {

			return String.valueOf(cell.getBooleanCellValue());

		} else if (cell.getCellType() == Cell.CELL_TYPE_FORMULA) {

			return cell.getCellFormula();

		} else if (cell.getCellType() == Cell.CELL_TYPE_NUMERIC) {

			return String.valueOf(cell.getNumericCellValue());

		}

		return "";
	}

	/**
	 * 获取合并单元格的值
	 * 
	 * @param sheet
	 * @param row
	 * @param column
	 * @return
	 */
	public String getMergedRegionValue(Sheet sheet, int row, int column) {
		int sheetMergeCount = sheet.getNumMergedRegions();

		for (int i = 0; i < sheetMergeCount; i++) {
			CellRangeAddress ca = sheet.getMergedRegion(i);
			int firstColumn = ca.getFirstColumn();
			int lastColumn = ca.getLastColumn();
			int firstRow = ca.getFirstRow();
			int lastRow = ca.getLastRow();

			if (row >= firstRow && row <= lastRow) {

				if (column >= firstColumn && column <= lastColumn) {
					Row fRow = sheet.getRow(firstRow);
					Cell fCell = fRow.getCell(firstColumn);

					return getCellValue(fCell);
				}
			}
		}

		return null;
	}

	public static void main(String[] args) {
		try {
			new ReadLine().read("E:\\a.xlsx");
		} catch (IOException e) {
			e.printStackTrace();
		} catch (InvalidFormatException e) {
			e.printStackTrace();
		}
	}
}


添加超链接示例代码:
这个是设置一个超链接弹出email地址, 其他类似
import java.io.FileOutputStream;
      
import org.apache.poi.ss.usermodel;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.ss.usermodel.IndexedColors;

/**
* @author lance
*/
public class HyperlinkExample {


  public static void main(String[]args) throws Exception{
	  Workbook wb = new XSSFWorkbook(); //or new HSSFWorkbook();
	  CreationHelper createHelper = wb.getCreationHelper();

	  CellStyle hlink_style = wb.createCellStyle();
	  Font hlink_font = wb.createFont();
	  hlink_font.setUnderline(Font.U_SINGLE);
	  hlink_font.setColor(IndexedColors.BLUE.getIndex());
	  hlink_style.setFont(hlink_font);

	  Cell cell;
	  Sheet sheet = wb.createSheet("Hyperlinks");
	  //URL
	  cell = sheet.createRow(0).createCell((short)0);
	  cell.setCellValue("URL Link");

	  Hyperlink link = createHelper.createHyperlink(Hyperlink.LINK_URL);
	  link.setAddress("http://poi.apache.org/");
	  cell.setHyperlink(link);
	  cell.setCellStyle(hlink_style);

	  //link to a file in the current directory
	  cell = sheet.createRow(1).createCell((short)0);
	  cell.setCellValue("File Link");
	  link = createHelper.createHyperlink(Hyperlink.LINK_FILE);
	  link.setAddress("link1.xls");
	  cell.setHyperlink(link);
	  cell.setCellStyle(hlink_style);

	  //e-mail link
	  cell = sheet.createRow(2).createCell((short)0);
	  cell.setCellValue("Email Link");
	  link = createHelper.createHyperlink(Hyperlink.LINK_EMAIL);
	  //设置路径
	  link.setAddress("mailto:poi@apache.org?subject=Hyperlinks");
	  cell.setHyperlink(link);
	  cell.setCellStyle(hlink_style);

	  //create a target sheet and cell
	  Sheet sheet2 = wb.createSheet("Target Sheet");
	  sheet2.createRow(0).createCell((short)0).setCellValue("Target Cell");

	  cell = sheet.createRow(3).createCell((short)0);
	  cell.setCellValue("Worksheet Link");
	  Hyperlink link2 = createHelper.createHyperlink(Hyperlink.LINK_DOCUMENT);
	  link2.setAddress("'Target Sheet'!A1");
	  cell.setHyperlink(link2);
	  cell.setCellStyle(hlink_style);

	  FileOutputStream out = new FileOutputStream("hyperinks.xlsx");
	  wb.write(out);
	  out.close();

  }
}
分享到:
评论

相关推荐

    c#代码读写excel数据demo

    // Iterate through rows and cells for (int rowIndex = 0; rowIndex ; rowIndex++) { var row = sheet.GetRow(rowIndex); if (row == null) continue; for (int cellIndex = 0; cellIndex ; cellIndex++) { ...

    ibatis中iterate的例子

    举个例子,假设我们有一个用户ID的列表,我们需要查询这些ID对应的所有用户信息,可以这样配置iBatis的映射文件: ```xml SELECT * FROM users WHERE id IN &lt;iterate prefix="(" property="ids" open="," close=...

    How to Iterate Over a Map in Java

    在Java编程中,Map是一种非常重要的数据结构,它存储键值对,允许通过键来高效地查找对应的值。本文将详细讲解如何在Java中遍历Map,包括不同类型的Map迭代方式,以及涉及到的相关知识点。 首先,Java中的Map接口...

    iterate_excel_links.ahk

    遍历当前打开的Excel表格,逐一打开每个单元格的URL

    struts 标签 logic:iterate使用 logic:iterate

    在`struts`的标签库中,`logic:iterate`是一个非常重要的标签,用于遍历集合对象,如`List`、`Map`或数组,并对每个元素执行循环操作。 `logic:iterate`标签的主要属性包括: 1. `id`: 用于给循环中的当前元素创建...

    ibatis标签

    在源码方面,iBATIS的实现中,`&lt;iterate&gt;`标签的解析和执行是通过SqlMapConfig.xml配置文件加载后,由SqlMapClientBuilder解析,并由Executor执行器来处理。在处理动态SQL时,iBATIS会将`&lt;iterate&gt;`标签转换成对应的...

    Node的Excel模块ExcelJS.zip

    // iterate over all current cells in this column including empty cells dobCol.eachCell({ includeEmpty: true }, function(cell, rowNumber) {  // ... }); 标签:ExcelJS

    logic:iterate 是什么,怎么用

    ### logic:iterate 标签详解 #### 一、概述 `logic:iterate` 是 Struts 标签库中的一个重要组成部分,主要用于在 JSP 页面中循环遍历集合对象(如 List、Array 或 Map 等)。它能够有效地帮助开发者在前端展示动态...

    struts-logic iterate标签学习指南

    Struts-Logic Iterate标签是Apache Struts框架中的一个重要组件,用于在JSP页面中迭代集合对象,如数组、列表或Map。这个标签提供了一种简洁的方式来遍历数据,并且可以与Struts的其他标签(如`bean:write`)配合...

    jsp脚本和标签:实现循环和分支逻辑

    jsp脚本和&lt;logic:iterate&gt;标签:实现循环和分支逻辑 jsp脚本和&lt;logic:iterate&gt;标签:实现循环和分支逻辑 jsp脚本和&lt;logic:iterate&gt;标签:实现循环和分支逻辑

    ExcelUtils-2.00.zip

    not edit any your source, just ExcelUtils parses your excel template and fills values to export your report. It is based POI project and beanutils project. It uses excel and template language's ...

    day37 04-Hibernate二级缓存:list和iterate方法比较

    对于`list`,如果查询结果已经在二级缓存中,那么可以直接从缓存读取,避免了数据库查询。而对于`iterate`,由于它是逐条加载,二级缓存主要作用于单个实体的缓存,而不是整个查询结果集。 要合理利用二级缓存,...

    logic:iterate产生问题的图

    logic:iterate 产生问题的图

    jxls导出excel所用所有jar包

    `jxls`是一个非常实用的库,它允许我们利用Java模板技术来方便地生成Excel文件。`jxls`通过简化Excel操作,使得在程序中动态填充数据到Excel模板变得简单高效。本篇文章将详细介绍`jxls`以及如何使用`jxls`导出Excel...

    ShapefileReader:Swift 中的 shapefile 阅读器

    形状文件读取器Swift 中的 ...ShapefileReader ( path : " g1g15.shp " ) else { assertionFailure () }// iterate over both shapes and recordsfor (shape, record) in sr. shapeAndRecordGenerator () { // record

    Mastering Linux Shell Scripting 2nd Edition

    see how to write for loops, how to iterate over simple values, and how to iterate over directory content. Also, you will learn how to write nested loops. Along with this, you will write while and ...

    Modelling Organs, Tissues, Cells and Devices COMSOL Multiphysics

    Computational modeling plays an increasingly important role in biological and medical research, as well as in the medical device industry....system behavior and iterate the device design.

    python-fitparse:用于解析ANTGarmin .FIT文件的Python库

    python-fitparse 这是一个用于解析ANT / Garmin .FIT文件的Python库。...# Iterate over all messages of type "record" # (other types include "device_info", "file_creator", "event", etc) for record in fit

    sor_iterate.rar_SOR_fortran code

    本压缩包"Sor_iterate.rar"包含了一个用Fortran编写的SOR方法实现,文件名为"sor_iterate.f90"。 Fortran是一种广泛用于科学计算的编程语言,以其高效和简洁的语法深受科研人员喜爱。在"sor_iterate.f90"中,我们...

    struts1 文件上传 文件下载

    这通常涉及读取`ActionForm`中的`File`对象和`String`对象(代表文件名),然后将文件写入服务器磁盘。 5. **异常处理**:文件上传可能遇到各种问题,如大小限制、文件类型限制等,因此需要进行错误检查和异常处理...

Global site tag (gtag.js) - Google Analytics