package com.css.java.learning.massbag;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import javax.imageio.IIOImage;
import javax.imageio.ImageIO;
import javax.imageio.ImageWriter;
import javax.imageio.stream.ImageOutputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
/**
*PDF 转html
* @author Red_Ant
* 20180925
*/
public class PdfToHtml {
@SuppressWarnings("deprecation")
private static void pdfToHtml(String sourcePath, String outPath){
List<String> imgList = new ArrayList<String>();
try {
PDDocument doc = PDDocument.load(sourcePath);
int pageCount = doc.getPageCount();
System.out.println("总共多少页?" + pageCount);
List pages = doc.getDocumentCatalog().getAllPages();
for(int i=0;i<pages.size();i++){
PDPage page = (PDPage)pages.get(i);
BufferedImage image = page.convertToImage();
Iterator iter = ImageIO.getImageWritersBySuffix("jpg");
ImageWriter writer = (ImageWriter)iter.next();
String imgName = File.separator + "20180925_pdf" + File.separator +i+".jpg";
File folder = new File(outPath + File.separator + "20180925_pdf"); //先创建文件夹
folder.mkdirs();
File outFile = new File(outPath + imgName); //再创建文件
imgList.add("20180925_pdf" + File.separator +i+".jpg");
outFile.createNewFile();
FileOutputStream out = new FileOutputStream(outFile);
ImageOutputStream outImage = ImageIO.createImageOutputStream(out);
writer.setOutput(outImage);
writer.write(new IIOImage(image,null,null));
}
doc.close();
} catch (Exception e) {
e.printStackTrace();
}
PptToHtml.createPPTHtml(outPath, imgList, sourcePath);
}
public static void main(String[] args) {
pdfToHtml("D:\\red_ant_file\\20180925\\20180925_pdf\\西点烘焙百科全书电子书.pdf",
"D:\\red_ant_file\\20180925\\20180925_pdf");
}
}
实现效果
原文:http://blog.51cto.com/13479739/2285600