[整理]poi读取word 2007批注信息

时间：2014-11-20 02:15:05 阅读：1298 评论：0 收藏：0 [点我收藏+]

?????? 本文非原创，只是整理了下代码，原代码出自:http://blog.chiefleo.me/archives/429.原文如下：

???????
bubuko.com,布布扣
????? 普通的读取批注信息方法:

?????

public void readWordDocxComments(String fileName) {
		XWPFDocument document = null;
		XWPFComment[] comments = null;
		try {
			document = new XWPFDocument(POIXMLDocument.openPackage(fileName));
			comments = document.getComments();
			for (int i = 0; i < comments.length; i++) {
				System.out.println("Id= " + comments[i].getId());
				System.out.println("Text= " + comments[i].getText());
				System.out.println("Author= " + comments[i].getAuthor());
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

??? 不能获取批注对应的正文信息,修改后的代码如下:

???

import java.io.File;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.xwpf.usermodel.XWPFComment;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument.Factory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class POI_读取批注_S4_Test {
	private File file;
	/** Word document */
	private XWPFDocument docx;
	/** 批注内容数组 */
	private XWPFComment[] comments;//
	/** 批注引用正文map,结构-<批注Id,正文text> */
	private Map<String, String> commentRefs;// /** 日期格式化类型 */
	private final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
	/** 批注所引用正文装配Map完毕标识 */
	private static final String COMMENT_REF_FILLED_OK = "OK";
	/** 批注最大下标 */
	private String maxCommentIndex;

	/*
	 * @param filePath Word文件路径
	 */
	public POI_读取批注_S4_Test(String filePath) throws Exception {
		file = new File(filePath);
		initAttributes();
	} 
	
	/*
	 * 初始化成员变量
	 * @throws Exception Word缺陷导入异常
	 */
	private void initAttributes() throws Exception {
		try {
			docx = new XWPFDocument(POIXMLDocument.openPackage(file
					.getCanonicalPath()));
			comments = docx.getComments();
			maxCommentIndex = String.valueOf(comments.length - 1);
			commentRefs = new HashMap<String, String>();
			fillCommentRef(docx.getDocument().getDomNode(),
					new StringBuilder(), new StringBuilder(),
					new StringBuilder(), commentRefs);
		} catch (Exception e) {
			throw new Exception(new StringBuilder().append("Word文件格式错误")
					.append("-").append(e.getMessage()).toString(), e);
		}
	}

	/*
	 * 获取批注内容
	 */
	public XWPFComment[] getComments() {
		return comments;
	}

	public Map<String, String> getCommentRefs() {
		return commentRefs;
	}

	/*
	 * 获取日期格式化类型
	 */
	public SimpleDateFormat getSdf() {
		return sdf;
	}

	/* 获取批注日期List */
	public List<Date> getSubmitDateList() {
		Map<String, Date> dateMap = new HashMap<String, Date>();
		List<Date> dateList = new ArrayList<Date>();
		try {
			Iterator<POIXMLDocumentPart> iter = docx.getRelations().iterator();
			do {
				if (!iter.hasNext())
					break;
				POIXMLDocumentPart p = (POIXMLDocumentPart) iter.next();
				String relation = p.getPackageRelationship()
						.getRelationshipType();
				if (relation.equals(XWPFRelation.COMMENT.getRelation())) {
					CommentsDocument cmntdoc;
					cmntdoc = Factory
							.parse(p.getPackagePart().getInputStream());
					List<CTComment> commentList = cmntdoc.getComments()
							.getCommentList();
					int len = commentList.size();
					int j = 0;
					while (j < len) {
						CTComment ctcomment = commentList.get(j);
						dateMap.put(ctcomment.getId().toString(), ctcomment
								.getDate().getTime());
						j++;
					}
				}
			} while (true);
		} catch (Exception e) {
		}
		if (dateMap != null) {
			for (XWPFComment comment : comments) {
				dateList.add(dateMap.get(comment.getId()));
			}
		}
		return dateList;
	}

	/*
	 * 获取批注作者List
	 */
	public List<String> getSubmitterList() {
		List<String> list = new ArrayList<String>();
		for (XWPFComment comment : comments) {
			list.add(comment.getAuthor().trim());
		}
		return list;
	}
	/*
	 * 组装批注引用文本Map,Map结构-<commentId,text>
	 * @param node WordProcessingML node
	 * @param id 批注ID
	 * @param value 批注引用正文文本
	 * @param convertOK 正文组装完毕标识 ,组装完毕 = "OK"
	 * @param map 要填充的目标Map
	 */
	private void fillCommentRef(Node node, StringBuilder id,
			StringBuilder value, StringBuilder convertOK,
			Map<String, String> map) throws Exception {
		// fillCommentRef方法要求所有参数不能为null,如果为null,抛出异常
		if (!insureNotNull(node, id, value, convertOK, map)) {
			throw new IllegalArgumentException(new StringBuilder()
					.append(this.getClass().getName())
					.append("fillCommentRef(").append(node).append(",")
					.append(id).append(",").append(value).append(",")
					.append(convertOK).append(",").append(map).append(")")
					.toString());
		}
		/*
		 * docx文件批注所引用的正文保存在document.xml中,可以通过重命名xx.docx为xx.zip来查看
		 * 其中如果某段正文文本内容有批注,那么会在document.xml这样保存 <w:commentRangeStart w:id="0" />
		 * <w:t>正文文本</w:t> </w:r> <w:commentRangeEnd w:id="0" />
		 * 如果被批注的是在图片上加批注,那么会在document
		 * .xml中这样保存(仅限真正docx文件,如果是doc文件另存为docx文件,<wp:docPr节点中是没有属性的)
		 * <w:commentRangeStart w:id="1" /> <wp:docPr id="1" name="xxx"
		 * descr="yyy.png" /> <w:commentRangeEnd w:id="1" /> *
		 * 1)id初始值为空,如果解析到节点w:commentRangeStart,就代表是有批注的部分,需要把参数id设为节点的id属性值
		 * 2)顺次解析下面节点
		 * ,如果此时的id不为空,就代表进入批注引用部分,w:t是文本内容,直接append;wp:docPr是图片内容,用"[xxx]"
		 * 来区分是图片,然后append.
		 * 3)如果解析到节点w:commentRangeEnd,就代表一个批注引用完毕,这时需要向Map中put(id,value)值;
		 * 判断当前的批注Id是不是最大
		 * ,如果为最大批注Id,convertOK置为"OK",用此标识来说明批注引用提取完毕,退出节点for循环?例如一个很大的Word文件
		 * ,只在第2页做了一个批注,前面的做法会很有用;
		 * 同时还要做好一条批注引用解析完毕的收尾工作:id清空,代表下面节点又是无批注的部分;value清空,待下次新的批注append.
		 */
		if ("w:t".equals(node.getNodeName()) && id.length() > 0) {
			value.append(node.getFirstChild().getNodeValue());
		} else if ("wp:docPr".equals(node.getNodeName()) && id.length() > 0) {
			value.append("[").append(getAttribute(node, "name")).append("]");
		} else if ("w:commentRangeStart".equals(node.getNodeName())) {
			id.setLength(0);
			id.append(getAttribute(node, "w:id"));
			value.setLength(0);
		} else if ("w:commentRangeEnd".equals(node.getNodeName())
				&& id.length() > 0) {
			if (id.toString().equals(getAttribute(node, "w:id"))) {
				map.put(id.toString(), value.toString());
				if (id.toString().equals(maxCommentIndex)) {
					convertOK.setLength(0);
					convertOK.append(COMMENT_REF_FILLED_OK);
					id.setLength(0);
					value.setLength(0);
				}
			}
		}
		if (node.hasChildNodes()) {
			NodeList temp = node.getChildNodes();
			for (int i = 0; i < temp.getLength(); i++) {
				if (convertOK.toString().endsWith(COMMENT_REF_FILLED_OK)) {
					break;
				}
				fillCommentRef(temp.item(i), id, value, convertOK, map);
			}
		}
	}

	/***
	 * @param node
	 *            当前的Node
	 * @param attName
	 *            要获取的属性名
	 * @return 属性值,没有该属性时返回null
	 */
	private static String getAttribute(Node node, String attName) {
		return (node.hasAttributes() && node.getAttributes().getNamedItem(
				attName) != null) ? node.getAttributes().getNamedItem(attName)
				.getNodeValue() : null;
	}

	/*
	 * 确保此方法的所有参数均不为空
	 * @param objects 对象参数
	 * @return 所有参数均不为空返回true 否则为false
	 */
	private boolean insureNotNull(Object... objects) {
		for (Object object : objects) {
			if (object == null) {
				return false;
			}
		}
		return true;
	}

	public static void main(String[] args) throws Exception {
		StringBuffer value = new StringBuffer();
		POI_读取批注_S4_Test wh = new POI_读取批注_S4_Test(
				"f:/saveFile/temp/sys_comment_07.docx");
		XWPFComment[] comments = wh.getComments();
		Map<String, String> commenRefMap = wh.getCommentRefs();
		List<Date> l = wh.getSubmitDateList();
		SimpleDateFormat sdf = wh.getSdf();
		XWPFComment comment;
		for (int i = 0; i < comments.length; i++) {
			comment = comments[i];
			value.append("批注Id:").append(comment.getId()).append(", ")
					.append("批注作者:").append(comment.getAuthor()).append(", ")
					.append("批注日期:").append(sdf.format(l.get(i))).append(", ")
					.append("批注内容:").append(comment.getText()).append(", ")
					.append("批注引用正文:")
					.append(commenRefMap.get(comment.getId()));
			value.append("\n");
		}
		System.out.println(value);
	}
}

?? 结果为:

??
bubuko.com,布布扣
???

???? 全文完。

?????

[整理]poi读取word 2007批注信息

原文：http://53873039oycg.iteye.com/blog/2157927

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)