Skip to content

read and write excel

yangyp8110 edited this page Aug 29, 2018 · 4 revisions

pom依赖

        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml</artifactId>
            <version>3.15</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi-ooxml-schemas</artifactId>
            <version>3.15</version>
        </dependency>
        <dependency>
            <groupId>org.apache.poi</groupId>
            <artifactId>poi</artifactId>
            <version>3.15</version>
        </dependency>

        <!--大量数据读取-->
        <dependency>
            <groupId>xerces</groupId>
            <artifactId>xercesImpl</artifactId>
            <version>2.11.0</version>
        </dependency>

读取/写入公用类

    //2003- 版本的excel
    private final static String excel2003L ="xls";
    //2007+ 版本的excel
    private final static String excel2007U ="xlsx";
    private final static SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
    /**
     * 生成excel文件
     * @param fileFullName 保存文件的地址(eg C:\Users\用户名\Desktop\data.xls)
     * @param titles 文件头
     * @param values 需要写入的数据 Map<rowNo, <colNo, String>>
     * @return
     * @throws IOException
     */
    public static void saveExcel(String fileFullName, List<String> titles, Map<Integer, Map<Integer, String>> values) throws IOException {
        @Cleanup
        Workbook workbook = generateWorkbook(StringUtils.getFilenameExtension(fileFullName), titles, values);
        File file = new File(fileFullName);
        @Cleanup
        FileOutputStream fileOutputStream = new FileOutputStream(file);
        workbook.write(fileOutputStream);
        fileOutputStream.flush();
    }

    /**
     * 指定header名和map格式数据,生成Workbook
     * @param fileExtension
     * @param titles
     * @param values
     * @return
     * @throws IOException
     */
    public static Workbook generateWorkbook(String fileExtension, List<String> titles, Map<Integer, Map<Integer, String>> values) throws IOException {
        Workbook  workbook = createWorkBook(fileExtension);
        // 生成一个sheet表格
        Sheet sheet = workbook.createSheet();
        // 设置表格默认列宽度为15个字节
        sheet.setDefaultColumnWidth((short) 15);
        // 创建标题行
        Row row = sheet.createRow(0);
        for (int i = 0; i < titles.size(); i++) {
            row.createCell(i).setCellValue(titles.get(i));
        }
        //标题行数
        int headRow = 1;
        //写入正文
        for (Integer key : values.keySet()) {
            row = sheet.createRow(headRow + key);
            Map<Integer, String> keySet = values.get(key);
            for (int j = 0; j < keySet.size(); j++) {
                Cell cell = row.createCell(j);
                cell.setCellValue(keySet.get(j));
            }
        }

        return workbook;
    }


    /**
     * 读取Excel为Json字符串,客户端反序列化即可得到List
     * @param inputStream
     * @param filenameExtension
     * @param headerMapper 【列序号】和【Json的Key名】组成Key-Value
     * @return
     * @throws Exception
     */
    public static String getJson(InputStream inputStream, String filenameExtension, Map<Integer, String> headerMapper) throws Exception {
        @Cleanup
        Workbook wb = createWorkBook(filenameExtension ,inputStream);

        Sheet sheetAt = wb.getSheetAt(0);
        int lastRowNum = sheetAt.getLastRowNum();

        StringBuilder sb = new StringBuilder();
        sb.append("[");
        for (int i = 0; i <= lastRowNum; i++) {
            Row row = sheetAt.getRow(i);
            if (i == 0) {
                continue;
            }
            sb.append("{");
            int physicalNumberOfCells = row.getPhysicalNumberOfCells();
            for (int j = 0; j < physicalNumberOfCells; j++) {
                Object value = getCellValue(row.getCell(j));
                sb.append("\"").append(headerMapper.get(j)).append("\":\"").append(value).append("\"");
                if (j < physicalNumberOfCells - 1) {
                    sb.append(",");
                }
            }
            sb.append("}");
            if (i < lastRowNum) {
                sb.append(",");
            }
        }
        sb.append("]");
        return sb.toString();
    }

    /**
     * 根据inputStream创建一个Workbook
     * 10W一下读取
     * @param fileType
     * @param inputStream
     * @return
     * @throws Exception
     */
    private static Workbook createWorkBook(String fileType, InputStream inputStream) throws Exception {
        Workbook wb;
        if (excel2003L.equals(fileType)) {
            //2003-
            wb = new HSSFWorkbook(inputStream);
        } else if (excel2007U.equals(fileType)) {
            //2007+
            wb = new XSSFWorkbook(inputStream);
        } else {
            throw new Exception("解析的文件格式有误!");
        }
        return wb;
    }

    /**
     * 创建一个Workbook
     * @param fileType
     * @return
     * @throws Exception
     */
    private static Workbook createWorkBook(String fileType) throws IOException {
        Workbook wb;
        if (excel2003L.equals(fileType)) {
            //2003-
            wb = new HSSFWorkbook();
        } else if (excel2007U.equals(fileType)) {
            //2007+  内存留存数据
            wb = new SXSSFWorkbook(10000);
        } else {
            throw new IOException("解析的文件格式有误!");
        }
        return wb;
    }

    /**
     * 读取Excel,返回Map
     * @param inputStream
     * @param filenameExtension
     * @param columnNos
     * @return
     * @throws Exception
     */
    public static Map<Integer,Map<Integer,Object>> getMapFromWorkBook(InputStream inputStream, String filenameExtension, List<Integer> columnNos) throws Exception {
        //Map<rowNo, Map<columnNo, columnValue>>
        Map<Integer, Map<Integer, Object>> sheetMap = new HashMap<>();

        @Cleanup
        Workbook wb = createWorkBook(filenameExtension ,inputStream);
        Sheet sheetAt = wb.getSheetAt(0);
        int lastRowNum = sheetAt.getLastRowNum();
        for (int i = 0; i <= lastRowNum; i++) {
            Row row = sheetAt.getRow(i);
            if(i == 0) {
                //skip header
                continue;
            }

            Map<Integer, Object> columnValMap = new HashMap<>();
            //按列展开
            for (Integer index : columnNos) {
                Object value = getCellValue(row.getCell(index));
                columnValMap.put(index, value);
            }

            sheetMap.put(i,columnValMap);
        }
        return sheetMap;
    }

    /**
     * 获取cell值
     * @param cell
     * @return
     */
    public static Object getCellValue(Cell cell) {
        Object value = "";
        switch (cell.getCellType()) {
            case Cell.CELL_TYPE_BLANK:
                value = "";
                break;
            case Cell.CELL_TYPE_BOOLEAN:
                value = cell.getBooleanCellValue();
                break;
            case Cell.CELL_TYPE_ERROR:
            case Cell.CELL_TYPE_FORMULA:
                break;
            case Cell.CELL_TYPE_NUMERIC:
                if (HSSFDateUtil.isCellDateFormatted(cell)) {
                    value = sdf.format(HSSFDateUtil.getJavaDate(cell.getNumericCellValue()));
                    break;
                } else {
                    value = new DecimalFormat("0").format(cell.getNumericCellValue());
                }
                break;
            case Cell.CELL_TYPE_STRING:
                value = cell.getStringCellValue();
                break;
        }
        return value;
    }
}

大量数据读取(转化为SAX方式读取)

package com.logback.demo.util;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

import java.io.IOException;
import java.io.InputStream;
import java.util.*;

/**
 * POI读取excel有两种模式,一种是用户模式,一种是事件驱动模式
 * 采用SAX事件驱动模式解决XLSX文件,可以有效解决用户模式内存溢出的问题,
 * 该模式是POI官方推荐的读取大数据的模式,在用户模式下,数据量较大,Sheet较多,或者是有很多无用的空行的情况下,容易出现内存溢出
 * <p>
 * 用于解决.xlsx2007版本大数据量问题
 **/
public class SheetHandler extends DefaultHandler {
    private SharedStringsTable sst;
    private String lastContents;
    private boolean nextIsString;

    private int sheetIndex = -1;
    private List<String> rowList = new ArrayList<>();
    public Map<Integer, Map<Integer, String>> dataMap = new HashMap<>();
    private int curRow = 0;
    private int curCol = 0;

    /**
     * 读取第一个sheet
     *
     * @param path
     * @throws Exception
     */
    public void readFirstSheet(String path) throws OpenXML4JException, IOException, SAXException {
        OPCPackage pkg = OPCPackage.open(path);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        Iterator<InputStream> sheets = r.getSheetsData();
        /**只读取第一个sheet*/
        if (sheets.hasNext()) {
            curRow = 0;
            sheetIndex++;
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
            sheet.close();
        }
    }

    /**
     * 读取所有工作簿的入口方法
     *
     * @param path
     * @throws Exception
     */
    public void readAllSheet(String path) throws OpenXML4JException, IOException, SAXException {
        OPCPackage pkg = OPCPackage.open(path);
        XSSFReader r = new XSSFReader(pkg);
        SharedStringsTable sst = r.getSharedStringsTable();

        XMLReader parser = fetchSheetParser(sst);

        //遍历所有sheet
        Iterator<InputStream> sheets = r.getSheetsData();
        while (sheets.hasNext()) {
            curRow = 0;
            sheetIndex++;
            InputStream sheet = sheets.next();
            InputSource sheetSource = new InputSource(sheet);
            parser.parse(sheetSource);
            sheet.close();
        }
        pkg.close();
    }

    /**
     * 该方法自动被调用,每读一行调用一次,在方法中写自己的业务逻辑即可
     *
     * @param sheetIndex 工作簿序号
     * @param curRow     处理到第几行
     * @param rowList    当前数据行的数据集合
     */
    public void optRow(int sheetIndex, int curRow, List<String> rowList) {
        if (curRow == 0) {
            //跳过表头
            return;
        }
        Map<Integer, String> colVal = new HashMap<>(rowList.size());
        for (int i = 0; i < rowList.size(); i++) {
            colVal.put(i, rowList.get(i));
        }
        dataMap.put(curRow, colVal);
        //System.out.println("read sheet:" + sheetIndex + ",row:" + curRow);
    }


    public XMLReader fetchSheetParser(SharedStringsTable sst) throws SAXException {
        XMLReader parser = XMLReaderFactory
                .createXMLReader("org.apache.xerces.parsers.SAXParser");
        this.sst = sst;
        parser.setContentHandler(this);
        return parser;
    }

    @Override
    public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
        // c => 单元格
        if (("c").equals(name)) {
            // 如果下一个元素是 SST 的索引,则将nextIsString标记为true
            String cellType = attributes.getValue("t");
            if (cellType != null && ("s").equals(cellType)) {
                nextIsString = true;
            } else {
                nextIsString = false;
            }
        }
        // 置空
        lastContents = "";
    }

    @Override
    public void endElement(String uri, String localName, String name) throws SAXException {
        // 根据SST的索引值的到单元格的真正要存储的字符串
        // 这时characters()方法可能会被调用多次
        if (nextIsString) {
            try {
                int idx = Integer.parseInt(lastContents);
                lastContents = new XSSFRichTextString(sst.getEntryAt(idx))
                        .toString();
            } catch (Exception e) {

            }
        }

        // v => 单元格的值,如果单元格是字符串则v标签的值为该字符串在SST中的索引
        // 将单元格内容加入rowlist中,在这之前先去掉字符串前后的空白符
        if ("c".equals(name)) {
            String value = lastContents.trim();
            value = "".equals(value) ? " " : value;
            rowList.add(curCol, value);
            curCol++;
        } else {
            // 如果标签名称为 row ,这说明已到行尾,调用 optRows() 方法
            if ("row".equals(name)) {
                optRow(sheetIndex, curRow, rowList);
                rowList.clear();
                curRow++;
                curCol = 0;
            }
        }
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        // 得到单元格内容的值(统一以string读取)
        lastContents += new String(ch, start, length);
    }
}
Clone this wiki locally