//整体excel加载 问题出现在第一步文件加载,经测试5M文件的加载基本已经到我电脑4G内存的极限了, //当然如果电脑配置更好的话可以优化jvm,应该会再大一些
import com.yeepay.prop.common.utils.excel.HxlsReader; import com.yeepay.prop.common.utils.excel.IRowReader; import com.yeepay.prop.common.utils.excel.XxlsReader; import com.yeepay.prop.common.utils.exception.SysOperCustomException; import org.apache.poi.hssf.usermodel.*; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import java.io.File; import java.io.FileInputStream; import java.text.SimpleDateFormat; import java.util.*; public class POIUtil { /** * 导入Excel文件 * 内容以List<Map<String K,String V>>的方式存放 * @param excelFile : Excel文件对象 * @param strKeys : Map的Key列表,Value为相应的sheet一行中各列的值 * @return */ public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) { String[] strKey = strKeys.split(","); List<Map<String,String>> listMap = new ArrayList<Map<String,String>>(); String fileName = excelFile.getPath(); int i = 0; try { Workbook workbook = null; String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase(); try { if (fileType.equals("xls")) { <span style="color:#FF0000;">workbook = new XSSFWorkbook(new FileInputStream(excelFile))</span>;//<span style="font-size:18px;color:#FF0000;">如果文件过大 一定会在里卡死,造成内存溢出</span> } else if (fileType.equals("xlsx")) { <span style="color:#FF0000;"> workbook = new HSSFWorkbook(new FileInputStream(excelFile));</span> } } catch (Exception e) { throw new SysOperCustomException("导入文件格式错误: "+e.getMessage()); } Sheet sheet = workbook.getSheetAt(0); while (true) { Row row = sheet.getRow(i); if (row == null) break; Map<String,String> map = new HashMap<String,String>(); Boolean flag = false; for(int keyIndex = 0; keyIndex < strKey.length; keyIndex++){ //System.out.println(keyIndex+" "+row.getCell(keyIndex)); Cell cell = row.getCell(keyIndex); //null 不可以 设置格式为cell_type_string if(null != row.getCell(keyIndex)) { if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell)) { // 数值、日期类型 double d = cell.getNumericCellValue(); // 日期类型 Date date = HSSFDateUtil.getJavaDate(d); cell.setCellType(HSSFCell.CELL_TYPE_STRING); cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") .format(date)); } else { cell.setCellType(HSSFCell.CELL_TYPE_STRING); } } String cellValue = ""; if(null != cell){ cellValue = row.getCell(keyIndex).getStringCellValue(); } //如果行数据存在不为空 或 null的数据,则视为有效数据 if(null != cellValue && !cellValue.isEmpty()) flag = true; map.put(strKey[keyIndex], cellValue.trim()); } //只添加有效数据 if(flag) listMap.add(map); i++; } } catch (Exception e) { e.printStackTrace(); System.out.println("导入中断,错误位置:第"+ i +"行数据!"); } return listMap; } }
import java.io.File; import java.util.Date; import java.util.List; import java.util.Map; public class Test { @org.junit.Test public void testImport() { File image = new File("D:\\test.xls"); String titelStr = "bankCustomerNo,cardNumber,tradeAmount,tradeTime,requestId";//实体属性 System.out.println(new Date()); List<Map<String,String>> list = POIUtil.importExcelToMap(image, titelStr); //模板的判定 //在这里处理业务逻辑与数据处理 实体的创建于保存 for(Map<String, String> map : list) { User user = new User(); user.setName(map.get("name")); . . . } } }
在我以前所有的excel数据导入都是通过poi的简单应用,直接加载整个excel进行数据返回统计,第一步直接造成了不可能对于大文件、大数据的的处理。因此通过参考http://blog.sina.com.cn/s/blog_628d4dd101017quk.html并在以上的基础上编写了如下工具类,之所以工具类不支持大数据量的处理是因为其中方法optRows编写代码问题,该方法为每一行数据的存储都会调用该方法,当数据量达到10万条数据的时候就会调用十万次,list中存在十万个map数据,map中又存在十万个key、10万个value,并且这些对象会长期持有不会释放掉,因此会造成gc回收出现文件,最后超出内存 异常爆出out of memory
之所以保留了此方法,是因为数据一起返回操作会更简便,业务书写会更方便(相对于大excel处理)
import com.yeepay.prop.common.utils.excel.HxlsReader; import com.yeepay.prop.common.utils.excel.IRowReader; import com.yeepay.prop.common.utils.excel.XxlsReader; import com.yeepay.prop.common.utils.exception.SysOperCustomException; import org.apache.poi.hssf.usermodel.*; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import java.io.File; import java.io.FileInputStream; import java.text.SimpleDateFormat; import java.util.*; public class POIUtil { /** * 导入Excel文件 * 内容以List<Map<String K,String V>>的方式存放 * @param excelFile : Excel文件对象 * @param strKeys : Map的Key列表,Value为相应的sheet一行中各列的值 * @return */ public static List<Map<String,String>> importExcelToMap(File excelFile, String strKeys) { String[] strKey = strKeys.split(","); List<Map<String,String>> listMap = new ArrayList<Map<String,String>>(); String fileName = excelFile.getPath(); int i = 0; try { Workbook workbook = null; String fileType = fileName.substring(fileName.lastIndexOf(".")+1).toLowerCase(); try { if (fileType.equals("xls")) { workbook = new XSSFWorkbook(new FileInputStream(excelFile)); } else if (fileType.equals("xlsx")) { workbook = new HSSFWorkbook(new FileInputStream(excelFile)); } } catch (Exception e) { throw new SysOperCustomException("导入文件格式错误: "+e.getMessage()); } Sheet sheet = workbook.getSheetAt(0); while (true) { Row row = sheet.getRow(i); if (row == null) break; Map<String,String> map = new HashMap<String,String>(); Boolean flag = false; for(int keyIndex = 0; keyIndex < strKey.length; keyIndex++){ //System.out.println(keyIndex+" "+row.getCell(keyIndex)); Cell cell = row.getCell(keyIndex); //null 不可以 设置格式为cell_type_string if(null != row.getCell(keyIndex)) { if(cell.getCellType() == Cell.CELL_TYPE_NUMERIC && HSSFDateUtil.isCellDateFormatted(cell)) { // 数值、日期类型 double d = cell.getNumericCellValue(); // 日期类型 Date date = HSSFDateUtil.getJavaDate(d); cell.setCellType(HSSFCell.CELL_TYPE_STRING); cell.setCellValue(new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") .format(date)); } else { cell.setCellType(HSSFCell.CELL_TYPE_STRING); } } String cellValue = ""; if(null != cell){ cellValue = row.getCell(keyIndex).getStringCellValue(); } //如果行数据存在不为空 或 null的数据,则视为有效数据 if(null != cellValue && !cellValue.isEmpty()) flag = true; map.put(strKey[keyIndex], cellValue.trim()); } //只添加有效数据 if(flag) listMap.add(map); i++; } } catch (Exception e) { e.printStackTrace(); System.out.println("导入中断,错误位置:第"+ i +"行数据!"); } return listMap; } /** * 导入Excel文件 暂时可以读取10万以内数据 * 内容以List<Map<String K,String V>>的方式存放 * @param excelFile : Excel文件对象 * @param strKeys : Map的Key列表,Value为相应的sheet一行中各列的值 * @return */ public static List<Map<String,String>> importBigExcelToMap(File excelFile, String strKeys) { String filename = excelFile.getPath(); String fileType = filename.substring(filename.lastIndexOf(".")+1).toLowerCase(); List<Map<String, String>> list = new ArrayList<Map<String, String>>(); try { if (fileType.equals("xls")) { list = <strong>HxlsUtil.importExcelToMap</strong>( excelFile, strKeys); } else if (fileType.equals("xlsx")) { list = <strong>XxlsUtil.importExcelToMap</strong>( excelFile,strKeys); } } catch (Exception e) { System.out.println("导入文件格式错误: "+e.getMessage()); } return list; } }
package com.yeepay.prop.common.utils; import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder.SheetRecordCollectingListener; import org.apache.poi.hssf.eventusermodel.*; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.eventusermodel.dummyrecord.MissingCellDummyRecord; import org.apache.poi.hssf.model.HSSFFormulaParser; import org.apache.poi.hssf.record.*; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PrintStream; import java.sql.SQLException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * excel 2003 文件处理 */ public class HxlsUtil implements HSSFListener { private int minColumns; private POIFSFileSystem fs; private PrintStream output; private int lastRowNumber; private int lastColumnNumber; /** Should we output the formula, or the value it has? */ private boolean outputFormulaValues = true; /** For parsing Formulas */ private SheetRecordCollectingListener workbookBuildingListener; private HSSFWorkbook stubWorkbook; // Records we pick up as we process private SSTRecord sstRecord; private FormatTrackingHSSFListener formatListener; /** So we known which sheet we're on */ private int sheetIndex = -1; private BoundSheetRecord[] orderedBSRs; private ArrayList boundSheetRecords = new ArrayList(); // For handling formulas with string results private int nextRow; private int nextColumn; private boolean outputNextStringRecord; private int curRow; private List<String> rowlist; @SuppressWarnings( "unused") private String sheetName; private String title; private List<Map<String, String>> mapList; private HxlsUtil(POIFSFileSystem fs, String title) throws SQLException { this.fs = fs; this.output = System.out; this.minColumns = -1; this.curRow = 0; this.rowlist = new ArrayList<String>(); this.title = title; this.mapList = new ArrayList<Map<String, String>>(); } private HxlsUtil(String filename, String title) throws IOException, SQLException { this(new POIFSFileSystem(new FileInputStream(filename)), title); } <strong>public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException { <span style="color:#FF0000;">String[] titleStr = title.split(",");</span> <span style="color:#FF0000;">int col</span> = rowlist.size(); <span style="color:#FF0000;">Map<String, String> map = new HashMap<String, String>();</span> for(int i = 0; i < titleStr.length; i++) { if(col >= titleStr.length) map.put(titleStr[i], rowlist.get(i).trim()); else map.put(titleStr[i], ""); } return map.size() > 0 ? map : null; }</strong> /** * 遍历 excel 文件 */ public void process() throws IOException { MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener( this); formatListener = new FormatTrackingHSSFListener(listener); HSSFEventFactory factory = new HSSFEventFactory(); HSSFRequest request = new HSSFRequest(); if (outputFormulaValues) { request.addListenerForAllRecords(formatListener); } else { workbookBuildingListener = new SheetRecordCollectingListener( formatListener); request.addListenerForAllRecords(workbookBuildingListener); } factory.processWorkbookEvents(request, fs); } /** * HSSFListener 监听方法,处理 Record */ @SuppressWarnings("unchecked") public void processRecord(Record record) { int thisRow = -1; int thisColumn = -1; String thisStr = null; String value = null; switch (record.getSid()) { case BoundSheetRecord.sid: boundSheetRecords.add(record); break; case BOFRecord.sid: BOFRecord br = (BOFRecord) record; if (br.getType() == BOFRecord.TYPE_WORKSHEET) { // 如果有需要,则建立子工作薄 if (workbookBuildingListener != null && stubWorkbook == null) { stubWorkbook = workbookBuildingListener .getStubHSSFWorkbook(); } sheetIndex++; if (orderedBSRs == null) { orderedBSRs = BoundSheetRecord .orderByBofPosition(boundSheetRecords); } sheetName = orderedBSRs[sheetIndex].getSheetname(); } break; case SSTRecord.sid: sstRecord = (SSTRecord) record; break; case BlankRecord.sid: BlankRecord brec = (BlankRecord) record; thisRow = brec.getRow(); thisColumn = brec.getColumn(); thisStr = ""; rowlist.add(thisColumn, thisStr); break; case BoolErrRecord.sid: //单元格为布尔类型 BoolErrRecord berec = (BoolErrRecord) record; thisRow = berec.getRow(); thisColumn = berec.getColumn(); thisStr = berec.getBooleanValue()+""; rowlist.add(thisColumn, thisStr); break; case FormulaRecord.sid: //单元格为公式类型 FormulaRecord frec = (FormulaRecord) record; thisRow = frec.getRow(); thisColumn = frec.getColumn(); if (outputFormulaValues) { if (Double.isNaN(frec.getValue())) { // Formula result is a string // This is stored in the next record outputNextStringRecord = true; nextRow = frec.getRow(); nextColumn = frec.getColumn(); } else { thisStr = formatListener.formatNumberDateCell(frec); } } else { thisStr = '"' + HSSFFormulaParser.toFormulaString(stubWorkbook, frec.getParsedExpression()) + '"'; } rowlist.add(thisColumn,thisStr); break; case StringRecord.sid://单元格中公式的字符串 if (outputNextStringRecord) { // String for formula StringRecord srec = (StringRecord) record; thisStr = srec.getString(); thisRow = nextRow; thisColumn = nextColumn; outputNextStringRecord = false; } break; case LabelRecord.sid: LabelRecord lrec = (LabelRecord) record; curRow = thisRow = lrec.getRow(); thisColumn = lrec.getColumn(); value = lrec.getValue().trim(); value = value.equals("")?" ":value; this.rowlist.add(thisColumn, value); break; case LabelSSTRecord.sid: //单元格为字符串类型 LabelSSTRecord lsrec = (LabelSSTRecord) record; curRow = thisRow = lsrec.getRow(); thisColumn = lsrec.getColumn(); if (sstRecord == null) { rowlist.add(thisColumn, " "); } else { value = sstRecord .getString(lsrec.getSSTIndex()).toString().trim(); value = value.equals("")?" ":value; rowlist.add(thisColumn,value); } break; case NumberRecord.sid: //单元格为数字类型 NumberRecord numrec = (NumberRecord) record; curRow = thisRow = numrec.getRow(); thisColumn = numrec.getColumn(); value = formatListener.formatNumberDateCell(numrec).trim(); value = value.equals("")?" ":value; // 向容器加入列值 rowlist.add(thisColumn, value); break; default: break; } // 遇到新行的操作 if (thisRow != -1 && thisRow != lastRowNumber) { lastColumnNumber = -1; } // 空值的操作 if (record instanceof MissingCellDummyRecord) { MissingCellDummyRecord mc = (MissingCellDummyRecord) record; curRow = thisRow = mc.getRow(); thisColumn = mc.getColumn(); rowlist.add(thisColumn," "); } // 更新行和列的值 if (thisRow > -1) lastRowNumber = thisRow; if (thisColumn > -1) lastColumnNumber = thisColumn; // 行结束时的操作 if (record instanceof LastCellOfRowDummyRecord) { if (minColumns > 0) { // 列值重新置空 if (lastColumnNumber == -1) { lastColumnNumber = 0; } } // 行结束时, 调用 optRows() 方法 lastColumnNumber = -1; try { mapList.add(optRows(sheetIndex,curRow, rowlist)); } catch (SQLException e) { e.printStackTrace(); } rowlist.clear(); } } /** * 导入Excel文件 * 内容以List<Map<String K,String V>>的方式存放 * @param excelFile : Excel文件对象 * @param title : Map的Key列表,Value为相应的sheet一行中各列的值 * @return */ public static List<Map<String,String>> importExcelToMap(File excelFile, String title) throws IOException, SQLException { HxlsUtil hxls = new HxlsUtil(excelFile.getPath(), title); hxls.process(); return hxls.mapList; } }
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.XMLReaderFactory; import java.io.File; import java.io.InputStream; import java.sql.SQLException; import java.util.*; /** * Excel 2007采用SAX解析文件,注意JDK版本1.5以上 */ public class XxlsUtil extends DefaultHandler { private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private int sheetIndex = -1; private List<String> rowlist = new ArrayList<String>(); private int curRow = 0; private int curCol = 0; private String title; private List<Map<String, String>> mapList; private XxlsUtil(String fileName, String title) throws Exception { this.title = title; this.mapList = new ArrayList<Map<String, String>>(); processOneSheet(fileName, 1); } public Map<String, String> optRows(int sheetIndex,int curRow, List<String> rowlist) throws SQLException { String[] titleStr = title.split(","); int col = rowlist.size(); Map<String, String> map = new HashMap<String, String>(); for(int i = 0; i < titleStr.length; i++) { if(col >= titleStr.length) map.put(titleStr[i], rowlist.get(i).trim()); else map.put(titleStr[i], ""); // System.out.print(rowlist.get(i) + " "); } return map.size() > 0 ? map : null; } //只遍历一个sheet,其中sheetId为要遍历的sheet索引,从1开始,1-3 public void processOneSheet(String filename,int sheetId) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); // rId2 found by processing the Workbook // 根据 rId# 或 rSheet# 查找sheet InputStream sheet2 = r.getSheet("rId"+sheetId); sheetIndex++; InputSource sheetSource = new InputSource(sheet2); parser.parse(sheetSource); sheet2.close(); } /** * 遍历 excel 文件 */ public void process(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); XMLReader parser = fetchSheetParser(sst); Iterator<InputStream> sheets = r.getSheetsData(); while (sheets.hasNext()) { curRow = 0; sheetIndex++; InputStream sheet = sheets.next(); InputSource sheetSource = new InputSource(sheet); parser.parse(sheetSource); sheet.close(); } } public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException { XMLReader parser = XMLReaderFactory.createXMLReader(); // .createXMLReader("org.apache.xerces.parsers.SAXParser"); this.sst = sst; parser.setContentHandler(this); return parser; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => 单元格 if (name.equals("c")) { // 如果下一个元素是 SST 的索引,则将nextIsString标记为true String cellType = attributes.getValue("t"); if (cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } // 置空 lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // 根据SST的索引值的到单元格的真正要存储的字符串 // 这时characters()方法可能会被调用多次 if (nextIsString) { try { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)) .toString(); } catch (Exception e) { } } // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引 // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符 if (name.equals("v")) { String value = lastContents.trim(); value = value.equals("")?" ":value; rowlist.add(curCol, value); curCol++; }else { //如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法 if (name.equals("row")) { try { mapList.add(optRows(sheetIndex, curRow, rowlist)); } catch (SQLException e) { e.printStackTrace(); } rowlist.clear(); curRow++; curCol = 0; } } } public void characters(char[] ch, int start, int length) throws SAXException { //得到单元格内容的值 lastContents += new String(ch, start, length); } /** * 导入Excel文件 * 内容以List<Map<String K,String V>>的方式存放 * @param excelFile : Excel文件对象 * @param title : Map的Key列表,Value为相应的sheet一行中各列的值 * @return */ public static List<Map<String,String>> importExcelToMap(File excelFile, String title){ try { XxlsUtil xxls = new XxlsUtil(excelFile.getPath(), title); return xxls.mapList; }catch (Exception e) { e.printStackTrace(); } return null; } }
import com.yeepay.prop.common.utils.POIUtil; import java.io.File; import java.util.Date; import java.util.List; import java.util.Map; /** * Description * PackageName: PACKAGE_NAME * * @author hua.jin * @since 14-12-22 上午9:26 */ public class Test { @org.junit.Test public void testImport() { File image = new File("D:\\test.xls"); String titelStr = "name,age,gender"; System.out.println(new Date()); List<Map<String,String>> list = POIUtil.<strong>importBigExcelToMap</strong>(image, titelStr); <pre name="code" class="java"> //模板的判定 //在这里处理业务逻辑与数据处理 实体的创建于保存 for(Map<String, String> map : list) { User user = new User(); user.setName(map.get("name")); . . . }}}
这个就没什么好说的了,该方法 主要参考了http://blog.csdn.net/goodkuang2012/article/details/7350985,将行数据进行了接口处理,因为一行行数据进行处理也就缺失了整体文件的把控,在异常处理这块 该好好考虑一下了,在此基础上的改动没有太大,也就不再叙述了,有得有失吧
经测试大文件处理80M没有问题
2003与2007excel数据导入小(5M)中(10M)大(80M以上)
原文:http://blog.csdn.net/kingo0/article/details/43191407